From 30f712c9dd69348aa51351d5cb6d366bf4fae31d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Jun 2014 14:57:53 +1000 Subject: libxfs: move source files Move all the source files that are shared with userspace into libxfs/. This is done as one big chunk simpy to get it done quickly Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/Makefile | 63 +- fs/xfs/libxfs/xfs_alloc.c | 2630 +++++++++++++++++ fs/xfs/libxfs/xfs_alloc_btree.c | 504 ++++ fs/xfs/libxfs/xfs_attr.c | 1459 ++++++++++ fs/xfs/libxfs/xfs_attr_leaf.c | 2697 +++++++++++++++++ fs/xfs/libxfs/xfs_attr_remote.c | 628 ++++ fs/xfs/libxfs/xfs_bmap.c | 5609 ++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_bmap_btree.c | 967 +++++++ fs/xfs/libxfs/xfs_btree.c | 3989 +++++++++++++++++++++++++ fs/xfs/libxfs/xfs_da_btree.c | 2665 +++++++++++++++++ fs/xfs/libxfs/xfs_da_format.c | 911 ++++++ fs/xfs/libxfs/xfs_dir2.c | 762 +++++ fs/xfs/libxfs/xfs_dir2_block.c | 1265 ++++++++ fs/xfs/libxfs/xfs_dir2_data.c | 1050 +++++++ fs/xfs/libxfs/xfs_dir2_leaf.c | 1831 ++++++++++++ fs/xfs/libxfs/xfs_dir2_node.c | 2284 +++++++++++++++ fs/xfs/libxfs/xfs_dir2_priv.h | 274 ++ fs/xfs/libxfs/xfs_dir2_sf.c | 1184 ++++++++ fs/xfs/libxfs/xfs_dquot_buf.c | 290 ++ fs/xfs/libxfs/xfs_ialloc.c | 2189 ++++++++++++++ fs/xfs/libxfs/xfs_ialloc_btree.c | 422 +++ fs/xfs/libxfs/xfs_inode_buf.c | 479 +++ fs/xfs/libxfs/xfs_inode_fork.c | 1906 ++++++++++++ fs/xfs/libxfs/xfs_log_rlimit.c | 150 + fs/xfs/libxfs/xfs_rtbitmap.c | 973 +++++++ fs/xfs/libxfs/xfs_symlink_remote.c | 201 ++ fs/xfs/libxfs/xfs_trans_resv.c | 894 ++++++ fs/xfs/xfs_alloc.c | 2630 ----------------- fs/xfs/xfs_alloc_btree.c | 504 ---- fs/xfs/xfs_attr.c | 1459 ---------- fs/xfs/xfs_attr_leaf.c | 2697 ----------------- fs/xfs/xfs_attr_remote.c | 628 ---- fs/xfs/xfs_bmap.c | 5609 ------------------------------------ fs/xfs/xfs_bmap_btree.c | 967 ------- fs/xfs/xfs_btree.c | 3989 ------------------------- fs/xfs/xfs_da_btree.c | 2665 ----------------- fs/xfs/xfs_da_format.c | 911 ------ fs/xfs/xfs_dir2.c | 762 ----- fs/xfs/xfs_dir2_block.c | 1265 -------- fs/xfs/xfs_dir2_data.c | 1050 ------- fs/xfs/xfs_dir2_leaf.c | 1831 ------------ fs/xfs/xfs_dir2_node.c | 2284 --------------- fs/xfs/xfs_dir2_priv.h | 274 -- fs/xfs/xfs_dir2_sf.c | 1184 -------- fs/xfs/xfs_dquot_buf.c | 290 -- fs/xfs/xfs_ialloc.c | 2189 -------------- fs/xfs/xfs_ialloc_btree.c | 422 --- fs/xfs/xfs_inode_buf.c | 479 --- fs/xfs/xfs_inode_fork.c | 1906 ------------ fs/xfs/xfs_log_rlimit.c | 150 - fs/xfs/xfs_rtbitmap.c | 973 ------- fs/xfs/xfs_symlink_remote.c | 201 -- fs/xfs/xfs_trans_resv.c | 894 ------ 53 files changed, 38245 insertions(+), 38244 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_alloc.c create mode 100644 fs/xfs/libxfs/xfs_alloc_btree.c create mode 100644 fs/xfs/libxfs/xfs_attr.c create mode 100644 fs/xfs/libxfs/xfs_attr_leaf.c create mode 100644 fs/xfs/libxfs/xfs_attr_remote.c create mode 100644 fs/xfs/libxfs/xfs_bmap.c create mode 100644 fs/xfs/libxfs/xfs_bmap_btree.c create mode 100644 fs/xfs/libxfs/xfs_btree.c create mode 100644 fs/xfs/libxfs/xfs_da_btree.c create mode 100644 fs/xfs/libxfs/xfs_da_format.c create mode 100644 fs/xfs/libxfs/xfs_dir2.c create mode 100644 fs/xfs/libxfs/xfs_dir2_block.c create mode 100644 fs/xfs/libxfs/xfs_dir2_data.c create mode 100644 fs/xfs/libxfs/xfs_dir2_leaf.c create mode 100644 fs/xfs/libxfs/xfs_dir2_node.c create mode 100644 fs/xfs/libxfs/xfs_dir2_priv.h create mode 100644 fs/xfs/libxfs/xfs_dir2_sf.c create mode 100644 fs/xfs/libxfs/xfs_dquot_buf.c create mode 100644 fs/xfs/libxfs/xfs_ialloc.c create mode 100644 fs/xfs/libxfs/xfs_ialloc_btree.c create mode 100644 fs/xfs/libxfs/xfs_inode_buf.c create mode 100644 fs/xfs/libxfs/xfs_inode_fork.c create mode 100644 fs/xfs/libxfs/xfs_log_rlimit.c create mode 100644 fs/xfs/libxfs/xfs_rtbitmap.c create mode 100644 fs/xfs/libxfs/xfs_symlink_remote.c create mode 100644 fs/xfs/libxfs/xfs_trans_resv.c delete mode 100644 fs/xfs/xfs_alloc.c delete mode 100644 fs/xfs/xfs_alloc_btree.c delete mode 100644 fs/xfs/xfs_attr.c delete mode 100644 fs/xfs/xfs_attr_leaf.c delete mode 100644 fs/xfs/xfs_attr_remote.c delete mode 100644 fs/xfs/xfs_bmap.c delete mode 100644 fs/xfs/xfs_bmap_btree.c delete mode 100644 fs/xfs/xfs_btree.c delete mode 100644 fs/xfs/xfs_da_btree.c delete mode 100644 fs/xfs/xfs_da_format.c delete mode 100644 fs/xfs/xfs_dir2.c delete mode 100644 fs/xfs/xfs_dir2_block.c delete mode 100644 fs/xfs/xfs_dir2_data.c delete mode 100644 fs/xfs/xfs_dir2_leaf.c delete mode 100644 fs/xfs/xfs_dir2_node.c delete mode 100644 fs/xfs/xfs_dir2_priv.h delete mode 100644 fs/xfs/xfs_dir2_sf.c delete mode 100644 fs/xfs/xfs_dquot_buf.c delete mode 100644 fs/xfs/xfs_ialloc.c delete mode 100644 fs/xfs/xfs_ialloc_btree.c delete mode 100644 fs/xfs/xfs_inode_buf.c delete mode 100644 fs/xfs/xfs_inode_fork.c delete mode 100644 fs/xfs/xfs_log_rlimit.c delete mode 100644 fs/xfs/xfs_rtbitmap.c delete mode 100644 fs/xfs/xfs_symlink_remote.c delete mode 100644 fs/xfs/xfs_trans_resv.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 4c5edf0df9a3..0dfa26d626f5 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -28,7 +28,35 @@ xfs-y += xfs_trace.o # build the libxfs code first xfs-y += $(addprefix libxfs/, \ + xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_attr_remote.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_da_btree.o \ + xfs_da_format.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dquot_buf.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_inode_fork.o \ + xfs_inode_buf.o \ + xfs_log_rlimit.o \ xfs_sb.o \ + xfs_symlink_remote.o \ + xfs_trans_resv.o \ + ) +# xfs_rtbitmap is shared with libxfs +xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \ + xfs_rtbitmap.o \ ) # highlevel code @@ -51,6 +79,7 @@ xfs-y += xfs_aops.o \ xfs_ioctl.o \ xfs_iomap.o \ xfs_iops.o \ + xfs_inode.o \ xfs_itable.o \ xfs_message.o \ xfs_mount.o \ @@ -62,41 +91,14 @@ xfs-y += xfs_aops.o \ kmem.o \ uuid.o -# code shared with libxfs -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_attr_remote.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_da_btree.o \ - xfs_da_format.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dquot_buf.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_icreate_item.o \ - xfs_inode.o \ - xfs_inode_fork.o \ - xfs_inode_buf.o \ - xfs_log_recover.o \ - xfs_log_rlimit.o \ - xfs_symlink_remote.o \ - xfs_trans_resv.o - # low-level transaction/log code xfs-y += xfs_log.o \ xfs_log_cil.o \ xfs_buf_item.o \ xfs_extfree_item.o \ + xfs_icreate_item.o \ xfs_inode_item.o \ + xfs_log_recover.o \ xfs_trans_ail.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ @@ -112,8 +114,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_quotaops.o # xfs_rtbitmap is shared with libxfs -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ - xfs_rtbitmap.o +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c new file mode 100644 index 000000000000..d43813267a80 --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -0,0 +1,2630 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_extent_busy.h" +#include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_log.h" + +struct workqueue_struct *xfs_alloc_wq; + +#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) + +#define XFSA_FIXUP_BNO_OK 1 +#define XFSA_FIXUP_CNT_OK 2 + +STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Update the record referred to by cur to the value given + * by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + union xfs_btree_rec rec; + + rec.alloc.ar_startblock = cpu_to_be32(bno); + rec.alloc.ar_blockcount = cpu_to_be32(len); + return xfs_btree_update(cur, &rec); +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *bno = be32_to_cpu(rec->alloc.ar_startblock); + *len = be32_to_cpu(rec->alloc.ar_blockcount); + } + return error; +} + +/* + * Compute aligned version of the found extent. + * Takes alignment and min length into account. + */ +STATIC void +xfs_alloc_compute_aligned( + xfs_alloc_arg_t *args, /* allocation argument structure */ + xfs_agblock_t foundbno, /* starting block in found extent */ + xfs_extlen_t foundlen, /* length in found extent */ + xfs_agblock_t *resbno, /* result block number */ + xfs_extlen_t *reslen) /* result length */ +{ + xfs_agblock_t bno; + xfs_extlen_t len; + + /* Trim busy sections out of found extent */ + xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + + if (args->alignment > 1 && len >= args->minlen) { + xfs_agblock_t aligned_bno = roundup(bno, args->alignment); + xfs_extlen_t diff = aligned_bno - bno; + + *resbno = aligned_bno; + *reslen = diff >= len ? 0 : len - diff; + } else { + *resbno = bno; + *reslen = len; + } +} + +/* + * Compute best start block and diff for "near" allocations. + * freelen >= wantlen already checked by caller. + */ +STATIC xfs_extlen_t /* difference value (absolute) */ +xfs_alloc_compute_diff( + xfs_agblock_t wantbno, /* target starting block */ + xfs_extlen_t wantlen, /* target length */ + xfs_extlen_t alignment, /* target alignment */ + char userdata, /* are we allocating data? */ + xfs_agblock_t freebno, /* freespace's starting block */ + xfs_extlen_t freelen, /* freespace's length */ + xfs_agblock_t *newbnop) /* result: best start block from free */ +{ + xfs_agblock_t freeend; /* end of freespace extent */ + xfs_agblock_t newbno1; /* return block number */ + xfs_agblock_t newbno2; /* other new block number */ + xfs_extlen_t newlen1=0; /* length with newbno1 */ + xfs_extlen_t newlen2=0; /* length with newbno2 */ + xfs_agblock_t wantend; /* end of target extent */ + + ASSERT(freelen >= wantlen); + freeend = freebno + freelen; + wantend = wantbno + wantlen; + /* + * We want to allocate from the start of a free extent if it is past + * the desired block or if we are allocating user data and the free + * extent is before desired block. The second case is there to allow + * for contiguous allocation from the remaining free space if the file + * grows in the short term. + */ + if (freebno >= wantbno || (userdata && freeend < wantend)) { + if ((newbno1 = roundup(freebno, alignment)) >= freeend) + newbno1 = NULLAGBLOCK; + } else if (freeend >= wantend && alignment > 1) { + newbno1 = roundup(wantbno, alignment); + newbno2 = newbno1 - alignment; + if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + else + newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); + if (newbno2 < freebno) + newbno2 = NULLAGBLOCK; + else + newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); + if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { + if (newlen1 < newlen2 || + (newlen1 == newlen2 && + XFS_ABSDIFF(newbno1, wantbno) > + XFS_ABSDIFF(newbno2, wantbno))) + newbno1 = newbno2; + } else if (newbno2 != NULLAGBLOCK) + newbno1 = newbno2; + } else if (freeend >= wantend) { + newbno1 = wantbno; + } else if (alignment > 1) { + newbno1 = roundup(freeend - wantlen, alignment); + if (newbno1 > freeend - wantlen && + newbno1 - alignment >= freebno) + newbno1 -= alignment; + else if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + } else + newbno1 = freeend - wantlen; + *newbnop = newbno1; + return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); +} + +/* + * Fix up the length, based on mod and prod. + * len should be k * prod + mod for some k. + * If len is too small it is returned unchanged. + * If len hits maxlen it is left alone. + */ +STATIC void +xfs_alloc_fix_len( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_extlen_t k; + xfs_extlen_t rlen; + + ASSERT(args->mod < args->prod); + rlen = args->len; + ASSERT(rlen >= args->minlen); + ASSERT(rlen <= args->maxlen); + if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || + (args->mod == 0 && rlen < args->prod)) + return; + k = rlen % args->prod; + if (k == args->mod) + return; + if (k > args->mod) + rlen = rlen - (k - args->mod); + else + rlen = rlen - args->prod + (args->mod - k); + if ((int)rlen < (int)args->minlen) + return; + ASSERT(rlen >= args->minlen && rlen <= args->maxlen); + ASSERT(rlen % args->prod == args->mod); + args->len = rlen; +} + +/* + * Fix up length if there is too little space left in the a.g. + * Return 1 if ok, 0 if too little, should give up. + */ +STATIC int +xfs_alloc_fix_minleft( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agf_t *agf; /* a.g. freelist header */ + int diff; /* free space difference */ + + if (args->minleft == 0) + return 1; + agf = XFS_BUF_TO_AGF(args->agbp); + diff = be32_to_cpu(agf->agf_freeblks) + - args->len - args->minleft; + if (diff >= 0) + return 1; + args->len += diff; /* shrink the allocated space */ + if (args->len >= args->minlen) + return 1; + args->agbno = NULLAGBLOCK; + return 0; +} + +/* + * Update the two btrees, logically removing from freespace the extent + * starting at rbno, rlen blocks. The extent is contained within the + * actual (current) free extent fbno for flen blocks. + * Flags are passed in indicating whether the cursors are set to the + * relevant records. + */ +STATIC int /* error code */ +xfs_alloc_fixup_trees( + xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ + xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + xfs_agblock_t fbno, /* starting block of free extent */ + xfs_extlen_t flen, /* length of free extent */ + xfs_agblock_t rbno, /* starting block of returned extent */ + xfs_extlen_t rlen, /* length of returned extent */ + int flags) /* flags, XFSA_FIXUP_... */ +{ + int error; /* error code */ + int i; /* operation results */ + xfs_agblock_t nfbno1; /* first new free startblock */ + xfs_agblock_t nfbno2; /* second new free startblock */ + xfs_extlen_t nflen1=0; /* first new free length */ + xfs_extlen_t nflen2=0; /* second new free length */ + + /* + * Look up the record in the by-size tree if necessary. + */ + if (flags & XFSA_FIXUP_CNT_OK) { +#ifdef DEBUG + if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Look up the record in the by-block tree if necessary. + */ + if (flags & XFSA_FIXUP_BNO_OK) { +#ifdef DEBUG + if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + +#ifdef DEBUG + if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { + struct xfs_btree_block *bnoblock; + struct xfs_btree_block *cntblock; + + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + + XFS_WANT_CORRUPTED_RETURN( + bnoblock->bb_numrecs == cntblock->bb_numrecs); + } +#endif + + /* + * Deal with all four cases: the allocated record is contained + * within the freespace record, so we can have new freespace + * at either (or both) end, or no freespace remaining. + */ + if (rbno == fbno && rlen == flen) + nfbno1 = nfbno2 = NULLAGBLOCK; + else if (rbno == fbno) { + nfbno1 = rbno + rlen; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else if (rbno + rlen == fbno + flen) { + nfbno1 = fbno; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else { + nfbno1 = fbno; + nflen1 = rbno - fbno; + nfbno2 = rbno + rlen; + nflen2 = (fbno + flen) - nfbno2; + } + /* + * Delete the entry from the by-size btree. + */ + if ((error = xfs_btree_delete(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + /* + * Add new by-size btree entry(s). + */ + if (nfbno1 != NULLAGBLOCK) { + if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_btree_insert(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + if (nfbno2 != NULLAGBLOCK) { + if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_btree_insert(cnt_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Fix up the by-block btree entry(s). + */ + if (nfbno1 == NULLAGBLOCK) { + /* + * No remaining freespace, just delete the by-block tree entry. + */ + if ((error = xfs_btree_delete(bno_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } else { + /* + * Update the by-block entry to start later|be shorter. + */ + if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) + return error; + } + if (nfbno2 != NULLAGBLOCK) { + /* + * 2 resulting free entries, need to add one. + */ + if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if ((error = xfs_btree_insert(bno_cur, &i))) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + return 0; +} + +static bool +xfs_agfl_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); + int i; + + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) + return false; + if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) + return false; + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) + return false; + + for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && + be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) + return false; + } + return true; +} + +static void +xfs_agfl_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_agfl_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_agfl_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + /* no verification of non-crc AGFLs */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_agfl_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (bip) + XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); +} + +const struct xfs_buf_ops xfs_agfl_buf_ops = { + .verify_read = xfs_agfl_read_verify, + .verify_write = xfs_agfl_write_verify, +}; + +/* + * Read in the allocation group free block array. + */ +STATIC int /* error */ +xfs_alloc_read_agfl( + xfs_mount_t *mp, /* mount point structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_buf_t **bpp) /* buffer for the ag free block array */ +{ + xfs_buf_t *bp; /* return value */ + int error; + + ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf( + mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); + if (error) + return error; + xfs_buf_set_ref(bp, XFS_AGFL_REF); + *bpp = bp; + return 0; +} + +STATIC int +xfs_alloc_update_counters( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agbp, + long len) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + pag->pagf_freeblks += len; + be32_add_cpu(&agf->agf_freeblks, len); + + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > + be32_to_cpu(agf->agf_length))) + return EFSCORRUPTED; + + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; +} + +/* + * Allocation group level functions. + */ + +/* + * Allocate a variable extent in the allocation group agno. + * Type and bno are used to determine where in the allocation group the + * extent will start. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent( + xfs_alloc_arg_t *args) /* argument structure for allocation */ +{ + int error=0; + + ASSERT(args->minlen > 0); + ASSERT(args->maxlen > 0); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->mod < args->prod); + ASSERT(args->alignment > 0); + /* + * Branch to correct routine based on the type. + */ + args->wasfromfl = 0; + switch (args->type) { + case XFS_ALLOCTYPE_THIS_AG: + error = xfs_alloc_ag_vextent_size(args); + break; + case XFS_ALLOCTYPE_NEAR_BNO: + error = xfs_alloc_ag_vextent_near(args); + break; + case XFS_ALLOCTYPE_THIS_BNO: + error = xfs_alloc_ag_vextent_exact(args); + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + + if (error || args->agbno == NULLAGBLOCK) + return error; + + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(!args->wasfromfl || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + + if (!args->wasfromfl) { + error = xfs_alloc_update_counters(args->tp, args->pag, + args->agbp, + -((long)(args->len))); + if (error) + return error; + + ASSERT(!xfs_extent_busy_search(args->mp, args->agno, + args->agbno, args->len)); + } + + if (!args->isfl) { + xfs_trans_mod_sb(args->tp, args->wasdel ? + XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, + -((long)(args->len))); + } + + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + return error; +} + +/* + * Allocate a variable extent at exactly agno/bno. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_exact( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ + xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + int error; + xfs_agblock_t fbno; /* start block of found extent */ + xfs_extlen_t flen; /* length of found extent */ + xfs_agblock_t tbno; /* start block of trimmed extent */ + xfs_extlen_t tlen; /* length of trimmed extent */ + xfs_agblock_t tend; /* end block of trimmed extent */ + int i; /* success/failure of operation */ + + ASSERT(args->alignment == 1); + + /* + * Allocate/initialize a cursor for the by-number freespace btree. + */ + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); + + /* + * Lookup bno and minlen in the btree (minlen is irrelevant, really). + * Look for the closest free block <= bno, it must contain bno + * if any free block does. + */ + error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); + if (error) + goto error0; + if (!i) + goto not_found; + + /* + * Grab the freespace record. + */ + error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ASSERT(fbno <= args->agbno); + + /* + * Check for overlapping busy extents. + */ + xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); + + /* + * Give up if the start of the extent is busy, or the freespace isn't + * long enough for the minimum request. + */ + if (tbno > args->agbno) + goto not_found; + if (tlen < args->minlen) + goto not_found; + tend = tbno + tlen; + if (tend < args->agbno + args->minlen) + goto not_found; + + /* + * End of extent will be smaller of the freespace end and the + * maximal requested end. + * + * Fix the length according to mod and prod if given. + */ + args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) + - args->agbno; + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) + goto not_found; + + ASSERT(args->agbno + args->len <= tend); + + /* + * We are allocating agbno for args->len + * Allocate/initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); + ASSERT(args->agbno + args->len <= + be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, + args->len, XFSA_FIXUP_BNO_OK); + if (error) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + goto error0; + } + + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + + args->wasfromfl = 0; + trace_xfs_alloc_exact_done(args); + return 0; + +not_found: + /* Didn't find it, return null. */ + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_exact_notfound(args); + return 0; + +error0: + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + trace_xfs_alloc_exact_error(args); + return error; +} + +/* + * Search the btree in a given direction via the search cursor and compare + * the records found against the good extent we've already found. + */ +STATIC int +xfs_alloc_find_best_extent( + struct xfs_alloc_arg *args, /* allocation argument structure */ + struct xfs_btree_cur **gcur, /* good cursor */ + struct xfs_btree_cur **scur, /* searching cursor */ + xfs_agblock_t gdiff, /* difference for search comparison */ + xfs_agblock_t *sbno, /* extent found by search */ + xfs_extlen_t *slen, /* extent length */ + xfs_agblock_t *sbnoa, /* aligned extent found by search */ + xfs_extlen_t *slena, /* aligned extent length */ + int dir) /* 0 = search right, 1 = search left */ +{ + xfs_agblock_t new; + xfs_agblock_t sdiff; + int error; + int i; + + /* The good extent is perfect, no need to search. */ + if (!gdiff) + goto out_use_good; + + /* + * Look until we find a better one, run out of space or run off the end. + */ + do { + error = xfs_alloc_get_rec(*scur, sbno, slen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); + + /* + * The good extent is closer than this one. + */ + if (!dir) { + if (*sbnoa >= args->agbno + gdiff) + goto out_use_good; + } else { + if (*sbnoa <= args->agbno - gdiff) + goto out_use_good; + } + + /* + * Same distance, compare length and pick the best. + */ + if (*slena >= args->minlen) { + args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); + xfs_alloc_fix_len(args); + + sdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, + args->userdata, *sbnoa, + *slena, &new); + + /* + * Choose closer size and invalidate other cursor. + */ + if (sdiff < gdiff) + goto out_use_search; + goto out_use_good; + } + + if (!dir) + error = xfs_btree_increment(*scur, 0, &i); + else + error = xfs_btree_decrement(*scur, 0, &i); + if (error) + goto error0; + } while (i); + +out_use_good: + xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); + *scur = NULL; + return 0; + +out_use_search: + xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); + *gcur = NULL; + return 0; + +error0: + /* caller invalidates cursors */ + return error; +} + +/* + * Allocate a variable extent near bno in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_near( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ + xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ + xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ + xfs_agblock_t gtbno; /* start bno of right side entry */ + xfs_agblock_t gtbnoa; /* aligned ... */ + xfs_extlen_t gtdiff; /* difference to right side entry */ + xfs_extlen_t gtlen; /* length of right side entry */ + xfs_extlen_t gtlena; /* aligned ... */ + xfs_agblock_t gtnew; /* useful start bno of right side */ + int error; /* error code */ + int i; /* result code, temporary */ + int j; /* result code, temporary */ + xfs_agblock_t ltbno; /* start bno of left side entry */ + xfs_agblock_t ltbnoa; /* aligned ... */ + xfs_extlen_t ltdiff; /* difference to left side entry */ + xfs_extlen_t ltlen; /* length of left side entry */ + xfs_extlen_t ltlena; /* aligned ... */ + xfs_agblock_t ltnew; /* useful start bno of left side */ + xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; +#ifdef DEBUG + /* + * Randomly don't execute the first algorithm. + */ + int dofirst; /* set to do first algorithm */ + + dofirst = prandom_u32() & 1; +#endif + +restart: + bno_cur_lt = NULL; + bno_cur_gt = NULL; + ltlen = 0; + gtlena = 0; + ltlena = 0; + + /* + * Get a cursor for the by-size btree. + */ + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); + + /* + * See if there are any free extents as big as maxlen. + */ + if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) + goto error0; + /* + * If none, then pick up the last entry in the tree unless the + * tree is empty. + */ + if (!i) { + if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, + <len, &i))) + goto error0; + if (i == 0 || ltlen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_noentry(args); + return 0; + } + ASSERT(i == 1); + } + args->wasfromfl = 0; + + /* + * First algorithm. + * If the requested extent is large wrt the freespaces available + * in this a.g., then the cursor will be pointing to a btree entry + * near the right edge of the tree. If it's in the last btree leaf + * block, then we just examine all the entries in that block + * that are big enough, and pick the best one. + * This is written as a while loop so we can break out of it, + * but we never loop back to the top. + */ + while (xfs_btree_islastblock(cnt_cur, 0)) { + xfs_extlen_t bdiff; + int besti=0; + xfs_extlen_t blen=0; + xfs_agblock_t bnew=0; + +#ifdef DEBUG + if (dofirst) + break; +#endif + /* + * Start from the entry that lookup found, sequence through + * all larger free blocks. If we're actually pointing at a + * record smaller than maxlen, go to the start of this block, + * and skip all those smaller than minlen. + */ + if (ltlen || args->alignment > 1) { + cnt_cur->bc_ptrs[0] = 1; + do { + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, + <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (ltlen >= args->minlen) + break; + if ((error = xfs_btree_increment(cnt_cur, 0, &i))) + goto error0; + } while (i); + ASSERT(ltlen >= args->minlen); + if (!i) + break; + } + i = cnt_cur->bc_ptrs[0]; + for (j = 1, blen = 0, bdiff = 0; + !error && j && (blen < args->maxlen || bdiff > 0); + error = xfs_btree_increment(cnt_cur, 0, &j)) { + /* + * For each entry, decide if it's better than + * the previous best entry. + */ + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); + if (ltlena < args->minlen) + continue; + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + ASSERT(args->len >= args->minlen); + if (args->len < blen) + continue; + ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->userdata, ltbnoa, + ltlena, <new); + if (ltnew != NULLAGBLOCK && + (args->len > blen || ltdiff < bdiff)) { + bdiff = ltdiff; + bnew = ltnew; + blen = args->len; + besti = cnt_cur->bc_ptrs[0]; + } + } + /* + * It didn't work. We COULD be in a case where + * there's a good record somewhere, so try again. + */ + if (blen == 0) + break; + /* + * Point at the best entry, and retrieve it again. + */ + cnt_cur->bc_ptrs[0] = besti; + if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + args->len = blen; + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_nominleft(args); + return 0; + } + blen = args->len; + /* + * We are allocating starting at bnew for blen blocks. + */ + args->agbno = bnew; + ASSERT(bnew >= ltbno); + ASSERT(bnew + blen <= ltbno + ltlen); + /* + * Set up a cursor for the by-bno tree. + */ + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); + /* + * Fix up the btree entries. + */ + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, + ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + + trace_xfs_alloc_near_first(args); + return 0; + } + /* + * Second algorithm. + * Search in the by-bno tree to the left and to the right + * simultaneously, until in each case we find a space big enough, + * or run into the edge of the tree. When we run into the edge, + * we deallocate that cursor. + * If both searches succeed, we compare the two spaces and pick + * the better one. + * With alignment, it's possible for both to fail; the upper + * level algorithm that picks allocation groups for allocations + * is not supposed to do this. + */ + /* + * Allocate and initialize the cursor for the leftward search. + */ + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); + /* + * Lookup <= bno to find the leftward search's starting point. + */ + if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) + goto error0; + if (!i) { + /* + * Didn't find anything; use this cursor for the rightward + * search. + */ + bno_cur_gt = bno_cur_lt; + bno_cur_lt = NULL; + } + /* + * Found something. Duplicate the cursor for the rightward search. + */ + else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) + goto error0; + /* + * Increment the cursor, so we will point at the entry just right + * of the leftward entry if any, or to the leftmost entry. + */ + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + /* + * It failed, there are no rightward entries. + */ + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Loop going left with the leftward cursor, right with the + * rightward cursor, until either both directions give up or + * we find an entry at least as big as minlen. + */ + do { + if (bno_cur_lt) { + if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); + if (ltlena >= args->minlen) + break; + if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + } + if (bno_cur_gt) { + if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena); + if (gtlena >= args->minlen) + break; + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + } while (bno_cur_lt || bno_cur_gt); + + /* + * Got both cursors still active, need to find better entry. + */ + if (bno_cur_lt && bno_cur_gt) { + if (ltlena >= args->minlen) { + /* + * Left side is good, look for a right side entry. + */ + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->userdata, ltbnoa, + ltlena, <new); + + error = xfs_alloc_find_best_extent(args, + &bno_cur_lt, &bno_cur_gt, + ltdiff, >bno, >len, + >bnoa, >lena, + 0 /* search right */); + } else { + ASSERT(gtlena >= args->minlen); + + /* + * Right side is good, look for a left side entry. + */ + args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); + xfs_alloc_fix_len(args); + gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->userdata, gtbnoa, + gtlena, >new); + + error = xfs_alloc_find_best_extent(args, + &bno_cur_gt, &bno_cur_lt, + gtdiff, <bno, <len, + <bnoa, <lena, + 1 /* search left */); + } + + if (error) + goto error0; + } + + /* + * If we couldn't get anything, give up. + */ + if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + + if (!forced++) { + trace_xfs_alloc_near_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + trace_xfs_alloc_size_neither(args); + args->agbno = NULLAGBLOCK; + return 0; + } + + /* + * At this point we have selected a freespace entry, either to the + * left or to the right. If it's on the right, copy all the + * useful variables to the "left" set so we only have one + * copy of this code. + */ + if (bno_cur_gt) { + bno_cur_lt = bno_cur_gt; + bno_cur_gt = NULL; + ltbno = gtbno; + ltbnoa = gtbnoa; + ltlen = gtlen; + ltlena = gtlena; + j = 1; + } else + j = 0; + + /* + * Fix up the length and compute the useful address. + */ + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) { + trace_xfs_alloc_near_nominleft(args); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + return 0; + } + rlen = args->len; + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, + args->userdata, ltbnoa, ltlena, <new); + ASSERT(ltnew >= ltbno); + ASSERT(ltnew + rlen <= ltbnoa + ltlena); + ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + args->agbno = ltnew; + + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, + ltnew, rlen, XFSA_FIXUP_BNO_OK))) + goto error0; + + if (j) + trace_xfs_alloc_near_greater(args); + else + trace_xfs_alloc_near_lesser(args); + + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + return 0; + + error0: + trace_xfs_alloc_near_error(args); + if (cnt_cur != NULL) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur_lt != NULL) + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); + if (bno_cur_gt != NULL) + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); + return error; +} + +/* + * Allocate a variable extent anywhere in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_size( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + int error; /* error result */ + xfs_agblock_t fbno; /* start of found freespace */ + xfs_extlen_t flen; /* length of found freespace */ + int i; /* temp status variable */ + xfs_agblock_t rbno; /* returned block number */ + xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; + +restart: + /* + * Allocate and initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); + bno_cur = NULL; + + /* + * Look for an entry >= maxlen+alignment-1 blocks. + */ + if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, + args->maxlen + args->alignment - 1, &i))) + goto error0; + + /* + * If none or we have busy extents that we cannot allocate from, then + * we have to settle for a smaller extent. In the case that there are + * no large extents, this will return the last entry in the tree unless + * the tree is empty. In the case that there are only busy large + * extents, this will return the largest small extent unless there + * are no smaller extents available. + */ + if (!i || forced > 1) { + error = xfs_alloc_ag_vextent_small(args, cnt_cur, + &fbno, &flen, &i); + if (error) + goto error0; + if (i == 0 || flen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_noentry(args); + return 0; + } + ASSERT(i == 1); + xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + } else { + /* + * Search for a non-busy extent that is large enough. + * If we are at low space, don't check, or if we fall of + * the end of the btree, turn off the busy check and + * restart. + */ + for (;;) { + error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + + if (rlen >= args->maxlen) + break; + + error = xfs_btree_increment(cnt_cur, 0, &i); + if (error) + goto error0; + if (i == 0) { + /* + * Our only valid extents must have been busy. + * Make it unbusy by forcing the log out and + * retrying. If we've been here before, forcing + * the log isn't making the extents available, + * which means they have probably been freed in + * this transaction. In that case, we have to + * give up on them and we'll attempt a minlen + * allocation the next time around. + */ + xfs_btree_del_cursor(cnt_cur, + XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + if (!forced++) + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + } + } + + /* + * In the first case above, we got the last entry in the + * by-size btree. Now we check to see if the space hits maxlen + * once aligned; if not, we search left for something better. + * This can't happen in the second case above. + */ + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), error0); + if (rlen < args->maxlen) { + xfs_agblock_t bestfbno; + xfs_extlen_t bestflen; + xfs_agblock_t bestrbno; + xfs_extlen_t bestrlen; + + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + for (;;) { + if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) + goto error0; + if (i == 0) + break; + if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (flen < bestrlen) + break; + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), + error0); + if (rlen > bestrlen) { + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + if (rlen == args->maxlen) + break; + } + } + if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + rlen = bestrlen; + rbno = bestrbno; + flen = bestflen; + fbno = bestfbno; + } + args->wasfromfl = 0; + /* + * Fix up the length. + */ + args->len = rlen; + if (rlen < args->minlen) { + if (!forced++) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + goto out_nominleft; + } + xfs_alloc_fix_len(args); + + if (!xfs_alloc_fix_minleft(args)) + goto out_nominleft; + rlen = args->len; + XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); + /* + * Allocate and initialize a cursor for the by-block tree. + */ + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + rbno, rlen, XFSA_FIXUP_CNT_OK))) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + cnt_cur = bno_cur = NULL; + args->len = rlen; + args->agbno = rbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), + error0); + trace_xfs_alloc_size_done(args); + return 0; + +error0: + trace_xfs_alloc_size_error(args); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + return error; + +out_nominleft: + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_nominleft(args); + args->agbno = NULLAGBLOCK; + return 0; +} + +/* + * Deal with the case where only small freespaces remain. + * Either return the contents of the last freespace record, + * or allocate space from the freelist if there is nothing in the tree. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_small( + xfs_alloc_arg_t *args, /* allocation argument structure */ + xfs_btree_cur_t *ccur, /* by-size cursor */ + xfs_agblock_t *fbnop, /* result block number */ + xfs_extlen_t *flenp, /* result length */ + int *stat) /* status: 0-freelist, 1-normal/none */ +{ + int error; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int i; + + if ((error = xfs_btree_decrement(ccur, 0, &i))) + goto error0; + if (i) { + if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + /* + * Nothing in the btree, try the freelist. Make sure + * to respect minleft even when pulling from the + * freelist. + */ + else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && + (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) + > args->minleft)) { + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); + if (error) + goto error0; + if (fbno != NULLAGBLOCK) { + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + args->userdata); + + if (args->userdata) { + xfs_buf_t *bp; + + bp = xfs_btree_get_bufs(args->mp, args->tp, + args->agno, fbno, 0); + xfs_trans_binval(args->tp, bp); + } + args->len = 1; + args->agbno = fbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), + error0); + args->wasfromfl = 1; + trace_xfs_alloc_small_freelist(args); + *stat = 0; + return 0; + } + /* + * Nothing in the freelist. + */ + else + flen = 0; + } + /* + * Can't allocate from the freelist for some reason. + */ + else { + fbno = NULLAGBLOCK; + flen = 0; + } + /* + * Can't do the allocation, give up. + */ + if (flen < args->minlen) { + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_small_notenough(args); + flen = 0; + } + *fbnop = fbno; + *flenp = flen; + *stat = 1; + trace_xfs_alloc_small_done(args); + return 0; + +error0: + trace_xfs_alloc_small_error(args); + return error; +} + +/* + * Free the extent starting at agno/bno for length. + */ +STATIC int /* error */ +xfs_free_ag_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t bno, /* starting block number */ + xfs_extlen_t len, /* length of extent */ + int isfl) /* set if is freelist blocks - no sb acctg */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ + int error; /* error return value */ + xfs_agblock_t gtbno; /* start of right neighbor block */ + xfs_extlen_t gtlen; /* length of right neighbor block */ + int haveleft; /* have a left neighbor block */ + int haveright; /* have a right neighbor block */ + int i; /* temp, result code */ + xfs_agblock_t ltbno; /* start of left neighbor block */ + xfs_extlen_t ltlen; /* length of left neighbor block */ + xfs_mount_t *mp; /* mount point struct for filesystem */ + xfs_agblock_t nbno; /* new starting block of freespace */ + xfs_extlen_t nlen; /* new length of freespace */ + xfs_perag_t *pag; /* per allocation group data */ + + mp = tp->t_mountp; + /* + * Allocate and initialize a cursor for the by-block btree. + */ + bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); + cnt_cur = NULL; + /* + * Look for a neighboring block on the left (lower block numbers) + * that is contiguous with this space. + */ + if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) + goto error0; + if (haveleft) { + /* + * There is a block to our left. + */ + if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (ltbno + ltlen < bno) + haveleft = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); + } + } + /* + * Look for a neighboring block on the right (higher block numbers) + * that is contiguous with this space. + */ + if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) + goto error0; + if (haveright) { + /* + * There is a block to our right. + */ + if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (bno + len < gtbno) + haveright = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); + } + } + /* + * Now allocate and initialize a cursor for the by-size tree. + */ + cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); + /* + * Have both left and right contiguous neighbors. + * Merge all three into a single free block. + */ + if (haveleft && haveright) { + /* + * Delete the old by-size entry on the left. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_btree_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-size entry on the right. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_btree_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-block entry for the right block. + */ + if ((error = xfs_btree_delete(bno_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Move the by-block cursor back to the left neighbor. + */ + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); +#ifdef DEBUG + /* + * Check that this is the right record: delete didn't + * mangle the cursor. + */ + { + xfs_agblock_t xxbno; + xfs_extlen_t xxlen; + + if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO( + i == 1 && xxbno == ltbno && xxlen == ltlen, + error0); + } +#endif + /* + * Update remaining by-block entry to the new, joined block. + */ + nbno = ltbno; + nlen = len + ltlen + gtlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * Have only a left contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveleft) { + /* + * Delete the old by-size entry on the left. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_btree_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Back up the by-block cursor to the left neighbor, and + * update its length. + */ + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + nbno = ltbno; + nlen = len + ltlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * Have only a right contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveright) { + /* + * Delete the old by-size entry on the right. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_btree_delete(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Update the starting block and length of the right + * neighbor in the by-block tree. + */ + nbno = bno; + nlen = len + gtlen; + if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) + goto error0; + } + /* + * No contiguous neighbors. + * Insert the new freespace into the by-block tree. + */ + else { + nbno = bno; + nlen = len; + if ((error = xfs_btree_insert(bno_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + bno_cur = NULL; + /* + * In all cases we need to insert the new freespace in the by-size tree. + */ + if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 0, error0); + if ((error = xfs_btree_insert(cnt_cur, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + cnt_cur = NULL; + + /* + * Update the freespace totals in the ag and superblock. + */ + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_update_counters(tp, pag, agbp, len); + xfs_perag_put(pag); + if (error) + goto error0; + + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); + + trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); + + return 0; + + error0: + trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Visible (exported) allocation/free functions. + * Some of these are used just by xfs_alloc_btree.c and this file. + */ + +/* + * Compute and fill in value of m_ag_maxlevels. + */ +void +xfs_alloc_compute_maxlevels( + xfs_mount_t *mp) /* file system mount structure */ +{ + int level; + uint maxblocks; + uint maxleafents; + int minleafrecs; + int minnoderecs; + + maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; + minleafrecs = mp->m_alloc_mnr[0]; + minnoderecs = mp->m_alloc_mnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + mp->m_ag_maxlevels = level; +} + +/* + * Find the length of the longest extent in an AG. + */ +xfs_extlen_t +xfs_alloc_longest_free_extent( + struct xfs_mount *mp, + struct xfs_perag *pag) +{ + xfs_extlen_t need, delta = 0; + + need = XFS_MIN_FREELIST_PAG(pag, mp); + if (need > pag->pagf_flcount) + delta = need - pag->pagf_flcount; + + if (pag->pagf_longest > delta) + return pag->pagf_longest - delta; + return pag->pagf_flcount > 0 || pag->pagf_longest > 0; +} + +/* + * Decide whether to use this allocation group for this allocation. + * If so, fix up the btree freelist's size. + */ +STATIC int /* error */ +xfs_alloc_fix_freelist( + xfs_alloc_arg_t *args, /* allocation argument structure */ + int flags) /* XFS_ALLOC_FLAG_... */ +{ + xfs_buf_t *agbp; /* agf buffer pointer */ + xfs_agf_t *agf; /* a.g. freespace structure pointer */ + xfs_buf_t *agflbp;/* agfl buffer pointer */ + xfs_agblock_t bno; /* freelist block */ + xfs_extlen_t delta; /* new blocks needed in freelist */ + int error; /* error result code */ + xfs_extlen_t longest;/* longest extent in allocation group */ + xfs_mount_t *mp; /* file system mount point structure */ + xfs_extlen_t need; /* total blocks needed in freelist */ + xfs_perag_t *pag; /* per-ag information structure */ + xfs_alloc_arg_t targs; /* local allocation arguments */ + xfs_trans_t *tp; /* transaction pointer */ + + mp = args->mp; + + pag = args->pag; + tp = args->tp; + if (!pag->pagf_init) { + if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp))) + return error; + if (!pag->pagf_init) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + args->agbp = NULL; + return 0; + } + } else + agbp = NULL; + + /* + * If this is a metadata preferred pag and we are user data + * then try somewhere else if we are not being asked to + * try harder at this point + */ + if (pag->pagf_metadata && args->userdata && + (flags & XFS_ALLOC_FLAG_TRYLOCK)) { + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + args->agbp = NULL; + return 0; + } + + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + /* + * If it looks like there isn't a long enough extent, or enough + * total blocks, reject it. + */ + need = XFS_MIN_FREELIST_PAG(pag, mp); + longest = xfs_alloc_longest_free_extent(mp, pag); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(pag->pagf_freeblks + pag->pagf_flcount - + need - args->total) < (int)args->minleft)) { + if (agbp) + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + } + + /* + * Get the a.g. freespace buffer. + * Can fail if we're not blocking on locks, and it's held. + */ + if (agbp == NULL) { + if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp))) + return error; + if (agbp == NULL) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + args->agbp = NULL; + return 0; + } + } + /* + * Figure out how many blocks we should have in the freelist. + */ + agf = XFS_BUF_TO_AGF(agbp); + need = XFS_MIN_FREELIST(agf, mp); + /* + * If there isn't enough total or single-extent, reject it. + */ + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + delta = need > be32_to_cpu(agf->agf_flcount) ? + (need - be32_to_cpu(agf->agf_flcount)) : 0; + longest = be32_to_cpu(agf->agf_longest); + longest = (longest > delta) ? (longest - delta) : + (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(be32_to_cpu(agf->agf_freeblks) + + be32_to_cpu(agf->agf_flcount) - need - args->total) < + (int)args->minleft)) { + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + } + /* + * Make the freelist shorter if it's too long. + */ + while (be32_to_cpu(agf->agf_flcount) > need) { + xfs_buf_t *bp; + + error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); + if (error) + return error; + if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) + return error; + bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + xfs_trans_binval(tp, bp); + } + /* + * Initialize the args structure. + */ + memset(&targs, 0, sizeof(targs)); + targs.tp = tp; + targs.mp = mp; + targs.agbp = agbp; + targs.agno = args->agno; + targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; + targs.type = XFS_ALLOCTYPE_THIS_AG; + targs.pag = pag; + if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) + return error; + /* + * Make the freelist longer if it's too short. + */ + while (be32_to_cpu(agf->agf_flcount) < need) { + targs.agbno = 0; + targs.maxlen = need - be32_to_cpu(agf->agf_flcount); + /* + * Allocate as many blocks as possible at once. + */ + if ((error = xfs_alloc_ag_vextent(&targs))) { + xfs_trans_brelse(tp, agflbp); + return error; + } + /* + * Stop if we run out. Won't happen if callers are obeying + * the restrictions correctly. Can happen for free calls + * on a completely full ag. + */ + if (targs.agbno == NULLAGBLOCK) { + if (flags & XFS_ALLOC_FLAG_FREEING) + break; + xfs_trans_brelse(tp, agflbp); + args->agbp = NULL; + return 0; + } + /* + * Put each allocated block on the list. + */ + for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { + error = xfs_alloc_put_freelist(tp, agbp, + agflbp, bno, 0); + if (error) + return error; + } + } + xfs_trans_brelse(tp, agflbp); + args->agbp = agbp; + return 0; +} + +/* + * Get a block from the freelist. + * Returns with the buffer for the block gotten. + */ +int /* error */ +xfs_alloc_get_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer containing the agf structure */ + xfs_agblock_t *bnop, /* block address retrieved from freelist */ + int btreeblk) /* destination is a AGF btree */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + xfs_agblock_t bno; /* block number returned */ + __be32 *agfl_bno; + int error; + int logflags; + xfs_mount_t *mp = tp->t_mountp; + xfs_perag_t *pag; /* per allocation group data */ + + /* + * Freelist is empty, give up. + */ + agf = XFS_BUF_TO_AGF(agbp); + if (!agf->agf_flcount) { + *bnop = NULLAGBLOCK; + return 0; + } + /* + * Read the array of free blocks. + */ + error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), + &agflbp); + if (error) + return error; + + + /* + * Get the block number and update the data structures. + */ + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); + be32_add_cpu(&agf->agf_flfirst, 1); + xfs_trans_brelse(tp, agflbp); + if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) + agf->agf_flfirst = 0; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + be32_add_cpu(&agf->agf_flcount, -1); + xfs_trans_agflist_delta(tp, -1); + pag->pagf_flcount--; + xfs_perag_put(pag); + + logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; + if (btreeblk) { + be32_add_cpu(&agf->agf_btreeblks, 1); + pag->pagf_btreeblks++; + logflags |= XFS_AGF_BTREEBLKS; + } + + xfs_alloc_log_agf(tp, agbp, logflags); + *bnop = bno; + + return 0; +} + +/* + * Log the given fields from the agf structure. + */ +void +xfs_alloc_log_agf( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer for a.g. freelist header */ + int fields) /* mask of fields to be logged (XFS_AGF_...) */ +{ + int first; /* first byte offset */ + int last; /* last byte offset */ + static const short offsets[] = { + offsetof(xfs_agf_t, agf_magicnum), + offsetof(xfs_agf_t, agf_versionnum), + offsetof(xfs_agf_t, agf_seqno), + offsetof(xfs_agf_t, agf_length), + offsetof(xfs_agf_t, agf_roots[0]), + offsetof(xfs_agf_t, agf_levels[0]), + offsetof(xfs_agf_t, agf_flfirst), + offsetof(xfs_agf_t, agf_fllast), + offsetof(xfs_agf_t, agf_flcount), + offsetof(xfs_agf_t, agf_freeblks), + offsetof(xfs_agf_t, agf_longest), + offsetof(xfs_agf_t, agf_btreeblks), + offsetof(xfs_agf_t, agf_uuid), + sizeof(xfs_agf_t) + }; + + trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); + + xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); +} + +/* + * Interface for inode allocation to force the pag data to be initialized. + */ +int /* error */ +xfs_alloc_pagf_init( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags) /* XFS_ALLOC_FLAGS_... */ +{ + xfs_buf_t *bp; + int error; + + if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) + return error; + if (bp) + xfs_trans_brelse(tp, bp); + return 0; +} + +/* + * Put the block on the freelist for the allocation group. + */ +int /* error */ +xfs_alloc_put_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + xfs_agblock_t bno, /* block being freed */ + int btreeblk) /* block came from a AGF btree */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + __be32 *blockp;/* pointer to array entry */ + int error; + int logflags; + xfs_mount_t *mp; /* mount structure */ + xfs_perag_t *pag; /* per allocation group data */ + __be32 *agfl_bno; + int startoff; + + agf = XFS_BUF_TO_AGF(agbp); + mp = tp->t_mountp; + + if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, + be32_to_cpu(agf->agf_seqno), &agflbp))) + return error; + be32_add_cpu(&agf->agf_fllast, 1); + if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) + agf->agf_fllast = 0; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + be32_add_cpu(&agf->agf_flcount, 1); + xfs_trans_agflist_delta(tp, 1); + pag->pagf_flcount++; + + logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; + if (btreeblk) { + be32_add_cpu(&agf->agf_btreeblks, -1); + pag->pagf_btreeblks--; + logflags |= XFS_AGF_BTREEBLKS; + } + xfs_perag_put(pag); + + xfs_alloc_log_agf(tp, agbp, logflags); + + ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); + + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; + *blockp = cpu_to_be32(bno); + startoff = (char *)blockp - (char *)agflbp->b_addr; + + xfs_alloc_log_agf(tp, agbp, logflags); + + xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); + xfs_trans_log_buf(tp, agflbp, startoff, + startoff + sizeof(xfs_agblock_t) - 1); + return 0; +} + +static bool +xfs_agf_verify( + struct xfs_mount *mp, + struct xfs_buf *bp) + { + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) + return false; + + if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && + XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && + be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && + be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) + return false; + + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) + return false; + + if (xfs_sb_version_haslazysbcount(&mp->m_sb) && + be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) + return false; + + return true;; + +} + +static void +xfs_agf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, + XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_agf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_agf_verify(mp, bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); +} + +const struct xfs_buf_ops xfs_agf_buf_ops = { + .verify_read = xfs_agf_read_verify, + .verify_write = xfs_agf_write_verify, +}; + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_BUF_ */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ +{ + int error; + + trace_xfs_read_agf(mp, agno); + + ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf( + mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); + if (error) + return error; + if (!*bpp) + return 0; + + ASSERT(!(*bpp)->b_error); + xfs_buf_set_ref(*bpp, XFS_AGF_REF); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ +{ + struct xfs_agf *agf; /* ag freelist header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + trace_xfs_alloc_read_agf(mp, agno); + + ASSERT(agno != NULLAGNUMBER); + error = xfs_read_agf(mp, tp, agno, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, + bpp); + if (error) + return error; + if (!*bpp) + return 0; + ASSERT(!(*bpp)->b_error); + + agf = XFS_BUF_TO_AGF(*bpp); + pag = xfs_perag_get(mp, agno); + if (!pag->pagf_init) { + pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); + pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); + pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); + pag->pagf_longest = be32_to_cpu(agf->agf_longest); + pag->pagf_levels[XFS_BTNUM_BNOi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); + pag->pagf_levels[XFS_BTNUM_CNTi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); + spin_lock_init(&pag->pagb_lock); + pag->pagb_count = 0; + pag->pagb_tree = RB_ROOT; + pag->pagf_init = 1; + } +#ifdef DEBUG + else if (!XFS_FORCED_SHUTDOWN(mp)) { + ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); + ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); + ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); + ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); + ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); + ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); + } +#endif + xfs_perag_put(pag); + return 0; +} + +/* + * Allocate an extent (variable-size). + * Depending on the allocation type, we either look in a single allocation + * group or loop over the allocation groups to find the result. + */ +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agblock_t agsize; /* allocation group size */ + int error; + int flags; /* XFS_ALLOC_FLAG_... locking flags */ + xfs_extlen_t minleft;/* minimum left value, temp copy */ + xfs_mount_t *mp; /* mount structure pointer */ + xfs_agnumber_t sagno; /* starting allocation group number */ + xfs_alloctype_t type; /* input allocation type */ + int bump_rotor = 0; + int no_min = 0; + xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ + + mp = args->mp; + type = args->otype = args->type; + args->agbno = NULLAGBLOCK; + /* + * Just fix this up, for the case where the last a.g. is shorter + * (or there's only one a.g.) and the caller couldn't easily figure + * that out (xfs_bmap_alloc). + */ + agsize = mp->m_sb.sb_agblocks; + if (args->maxlen > agsize) + args->maxlen = agsize; + if (args->alignment == 0) + args->alignment = 1; + ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->minlen <= agsize); + ASSERT(args->mod < args->prod); + if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || + XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || + args->minlen > args->maxlen || args->minlen > agsize || + args->mod >= args->prod) { + args->fsbno = NULLFSBLOCK; + trace_xfs_alloc_vextent_badargs(args); + return 0; + } + minleft = args->minleft; + + switch (type) { + case XFS_ALLOCTYPE_THIS_AG: + case XFS_ALLOCTYPE_NEAR_BNO: + case XFS_ALLOCTYPE_THIS_BNO: + /* + * These three force us into a single a.g. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + args->pag = xfs_perag_get(mp, args->agno); + args->minleft = 0; + error = xfs_alloc_fix_freelist(args, 0); + args->minleft = minleft; + if (error) { + trace_xfs_alloc_vextent_nofix(args); + goto error0; + } + if (!args->agbp) { + trace_xfs_alloc_vextent_noagbp(args); + break; + } + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + if ((error = xfs_alloc_ag_vextent(args))) + goto error0; + break; + case XFS_ALLOCTYPE_START_BNO: + /* + * Try near allocation first, then anywhere-in-ag after + * the first a.g. fails. + */ + if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && + (mp->m_flags & XFS_MOUNT_32BITINODES)) { + args->fsbno = XFS_AGB_TO_FSB(mp, + ((mp->m_agfrotor / rotorstep) % + mp->m_sb.sb_agcount), 0); + bump_rotor = 1; + } + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + /* FALLTHROUGH */ + case XFS_ALLOCTYPE_ANY_AG: + case XFS_ALLOCTYPE_START_AG: + case XFS_ALLOCTYPE_FIRST_AG: + /* + * Rotate through the allocation groups looking for a winner. + */ + if (type == XFS_ALLOCTYPE_ANY_AG) { + /* + * Start with the last place we left off. + */ + args->agno = sagno = (mp->m_agfrotor / rotorstep) % + mp->m_sb.sb_agcount; + args->type = XFS_ALLOCTYPE_THIS_AG; + flags = XFS_ALLOC_FLAG_TRYLOCK; + } else if (type == XFS_ALLOCTYPE_FIRST_AG) { + /* + * Start with allocation group given by bno. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_THIS_AG; + sagno = 0; + flags = 0; + } else { + if (type == XFS_ALLOCTYPE_START_AG) + args->type = XFS_ALLOCTYPE_THIS_AG; + /* + * Start with the given allocation group. + */ + args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); + flags = XFS_ALLOC_FLAG_TRYLOCK; + } + /* + * Loop over allocation groups twice; first time with + * trylock set, second time without. + */ + for (;;) { + args->pag = xfs_perag_get(mp, args->agno); + if (no_min) args->minleft = 0; + error = xfs_alloc_fix_freelist(args, flags); + args->minleft = minleft; + if (error) { + trace_xfs_alloc_vextent_nofix(args); + goto error0; + } + /* + * If we get a buffer back then the allocation will fly. + */ + if (args->agbp) { + if ((error = xfs_alloc_ag_vextent(args))) + goto error0; + break; + } + + trace_xfs_alloc_vextent_loopfailed(args); + + /* + * Didn't work, figure out the next iteration. + */ + if (args->agno == sagno && + type == XFS_ALLOCTYPE_START_BNO) + args->type = XFS_ALLOCTYPE_THIS_AG; + /* + * For the first allocation, we can try any AG to get + * space. However, if we already have allocated a + * block, we don't want to try AGs whose number is below + * sagno. Otherwise, we may end up with out-of-order + * locking of AGF, which might cause deadlock. + */ + if (++(args->agno) == mp->m_sb.sb_agcount) { + if (args->firstblock != NULLFSBLOCK) + args->agno = sagno; + else + args->agno = 0; + } + /* + * Reached the starting a.g., must either be done + * or switch to non-trylock mode. + */ + if (args->agno == sagno) { + if (no_min == 1) { + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_vextent_allfailed(args); + break; + } + if (flags == 0) { + no_min = 1; + } else { + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + } + } + } + xfs_perag_put(args->pag); + } + if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { + if (args->agno == sagno) + mp->m_agfrotor = (mp->m_agfrotor + 1) % + (mp->m_sb.sb_agcount * rotorstep); + else + mp->m_agfrotor = (args->agno * rotorstep + 1) % + (mp->m_sb.sb_agcount * rotorstep); + } + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + if (args->agbno == NULLAGBLOCK) + args->fsbno = NULLFSBLOCK; + else { + args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); +#ifdef DEBUG + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(args->agbno % args->alignment == 0); + XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), + args->len); +#endif + } + xfs_perag_put(args->pag); + return 0; +error0: + xfs_perag_put(args->pag); + return error; +} + +/* + * Free an extent. + * Just break up the extent address and hand off to xfs_free_ag_extent + * after fixing up the freelist. + */ +int /* error */ +xfs_free_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len) /* length of extent */ +{ + xfs_alloc_arg_t args; + int error; + + ASSERT(len != 0); + memset(&args, 0, sizeof(xfs_alloc_arg_t)); + args.tp = tp; + args.mp = tp->t_mountp; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ + args.agno = XFS_FSB_TO_AGNO(args.mp, bno); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; + + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; + + args.pag = xfs_perag_get(args.mp, args.agno); + ASSERT(args.pag); + + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) + goto error0; + + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; + } + + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); + if (!error) + xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); +error0: + xfs_perag_put(args.pag); + return error; +} diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c new file mode 100644 index 000000000000..8358f1ded94d --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_extent_busy.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" + + +STATIC struct xfs_btree_cur * +xfs_allocbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + +STATIC void +xfs_allocbt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + int btnum = cur->bc_btnum; + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); + + ASSERT(ptr->s != 0); + + agf->agf_roots[btnum] = ptr->s; + be32_add_cpu(&agf->agf_levels[btnum], inc); + pag->pagf_levels[btnum] += inc; + xfs_perag_put(pag); + + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); +} + +STATIC int +xfs_allocbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + int error; + xfs_agblock_t bno; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* Allocate the new block from the freelist. If we can't, give up. */ + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &bno, 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + if (bno == NULLAGBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + + xfs_trans_agbtree_delta(cur->bc_tp, 1); + new->s = cpu_to_be32(bno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + +STATIC int +xfs_allocbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agblock_t bno; + int error; + + bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); + if (error) + return error; + + xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + XFS_EXTENT_BUSY_SKIP_DISCARD); + xfs_trans_agbtree_delta(cur->bc_tp, -1); + + xfs_trans_binval(cur->bc_tp, bp); + return 0; +} + +/* + * Update the longest extent in the AGF + */ +STATIC void +xfs_allocbt_update_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_rec *rec, + int ptr, + int reason) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_perag *pag; + __be32 len; + int numrecs; + + ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); + + switch (reason) { + case LASTREC_UPDATE: + /* + * If this is the last leaf block and it's the last record, + * then update the size of the longest extent in the AG. + */ + if (ptr != xfs_btree_get_numrecs(block)) + return; + len = rec->alloc.ar_blockcount; + break; + case LASTREC_INSREC: + if (be32_to_cpu(rec->alloc.ar_blockcount) <= + be32_to_cpu(agf->agf_longest)) + return; + len = rec->alloc.ar_blockcount; + break; + case LASTREC_DELREC: + numrecs = xfs_btree_get_numrecs(block); + if (ptr <= numrecs) + return; + ASSERT(ptr == numrecs + 1); + + if (numrecs) { + xfs_alloc_rec_t *rrp; + + rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); + len = rrp->ar_blockcount; + } else { + len = 0; + } + + break; + default: + ASSERT(0); + return; + } + + agf->agf_longest = len; + pag = xfs_perag_get(cur->bc_mp, seqno); + pag->pagf_longest = be32_to_cpu(len); + xfs_perag_put(pag); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); +} + +STATIC int +xfs_allocbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mnr[level != 0]; +} + +STATIC int +xfs_allocbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mxr[level != 0]; +} + +STATIC void +xfs_allocbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(rec->alloc.ar_startblock != 0); + + key->alloc.ar_startblock = rec->alloc.ar_startblock; + key->alloc.ar_blockcount = rec->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->alloc.ar_startblock != 0); + + rec->alloc.ar_startblock = key->alloc.ar_startblock; + rec->alloc.ar_blockcount = key->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + ASSERT(cur->bc_rec.a.ar_startblock != 0); + + rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); + rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); +} + +STATIC void +xfs_allocbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(agf->agf_roots[cur->bc_btnum] != 0); + + ptr->s = agf->agf_roots[cur->bc_btnum]; +} + +STATIC __int64_t +xfs_allocbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; + xfs_alloc_key_t *kp = &key->alloc; + __int64_t diff; + + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return (__int64_t)be32_to_cpu(kp->ar_startblock) - + rec->ar_startblock; + } + + diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; + if (diff) + return diff; + + return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + +static bool +xfs_allocbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_perag *pag = bp->b_pag; + unsigned int level; + + /* + * magic number and level verification + * + * During growfs operations, we can't verify the exact level or owner as + * the perag is not fully initialised and hence not attached to the + * buffer. In this case, check against the maximum tree depth. + * + * Similarly, during log recovery we will have a perag structure + * attached, but the agf information will not yet have been initialised + * from the on disk AGF. Again, we can only check against maximum limits + * in this case. + */ + level = be16_to_cpu(block->bb_level); + switch (block->bb_magic) { + case cpu_to_be32(XFS_ABTB_CRC_MAGIC): + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + return false; + if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) + return false; + if (pag && + be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) + return false; + /* fall through */ + case cpu_to_be32(XFS_ABTB_MAGIC): + if (pag && pag->pagf_init) { + if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) + return false; + } else if (level >= mp->m_ag_maxlevels) + return false; + break; + case cpu_to_be32(XFS_ABTC_CRC_MAGIC): + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + return false; + if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) + return false; + if (pag && + be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) + return false; + /* fall through */ + case cpu_to_be32(XFS_ABTC_MAGIC): + if (pag && pag->pagf_init) { + if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) + return false; + } else if (level >= mp->m_ag_maxlevels) + return false; + break; + default: + return false; + } + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0]) + return false; + + /* sibling pointer verification */ + if (!block->bb_u.s.bb_leftsib || + (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && + block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) + return false; + if (!block->bb_u.s.bb_rightsib || + (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && + block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) + return false; + + return true; +} + +static void +xfs_allocbt_read_verify( + struct xfs_buf *bp) +{ + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_allocbt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); + } +} + +static void +xfs_allocbt_write_verify( + struct xfs_buf *bp) +{ + if (!xfs_allocbt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + xfs_btree_sblock_calc_crc(bp); + +} + +const struct xfs_buf_ops xfs_allocbt_buf_ops = { + .verify_read = xfs_allocbt_read_verify, + .verify_write = xfs_allocbt_write_verify, +}; + + +#if defined(DEBUG) || defined(XFS_WARN) +STATIC int +xfs_allocbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock); + } else { + return be32_to_cpu(k1->alloc.ar_blockcount) < + be32_to_cpu(k2->alloc.ar_blockcount) || + (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && + be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock)); + } +} + +STATIC int +xfs_allocbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(r1->alloc.ar_startblock) + + be32_to_cpu(r1->alloc.ar_blockcount) <= + be32_to_cpu(r2->alloc.ar_startblock); + } else { + return be32_to_cpu(r1->alloc.ar_blockcount) < + be32_to_cpu(r2->alloc.ar_blockcount) || + (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && + be32_to_cpu(r1->alloc.ar_startblock) < + be32_to_cpu(r2->alloc.ar_startblock)); + } +} +#endif /* DEBUG */ + +static const struct xfs_btree_ops xfs_allocbt_ops = { + .rec_len = sizeof(xfs_alloc_rec_t), + .key_len = sizeof(xfs_alloc_key_t), + + .dup_cursor = xfs_allocbt_dup_cursor, + .set_root = xfs_allocbt_set_root, + .alloc_block = xfs_allocbt_alloc_block, + .free_block = xfs_allocbt_free_block, + .update_lastrec = xfs_allocbt_update_lastrec, + .get_minrecs = xfs_allocbt_get_minrecs, + .get_maxrecs = xfs_allocbt_get_maxrecs, + .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_rec_from_key = xfs_allocbt_init_rec_from_key, + .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, + .key_diff = xfs_allocbt_key_diff, + .buf_ops = &xfs_allocbt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_allocbt_keys_inorder, + .recs_inorder = xfs_allocbt_recs_inorder, +#endif +}; + +/* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_btree_cur *cur; + + ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = btnum; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_ops = &xfs_allocbt_ops; + + if (btnum == XFS_BTNUM_CNT) { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; + } else { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + } + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + + return cur; +} + +/* + * Calculate number of records in an alloc btree block. + */ +int +xfs_allocbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= XFS_ALLOC_BLOCK_LEN(mp); + + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c new file mode 100644 index 000000000000..7d95b16f0919 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr.c @@ -0,0 +1,1459 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_remote.h" +#include "xfs_error.h" +#include "xfs_quota.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_dinode.h" + +/* + * xfs_attr.c + * + * Provide the external interfaces to manage attribute lists. + */ + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Internal routines when attribute list fits inside the inode. + */ +STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); + +/* + * Internal routines when attribute list is one block. + */ +STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); +STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); +STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); + +/* + * Internal routines when attribute list is more than one block. + */ +STATIC int xfs_attr_node_get(xfs_da_args_t *args); +STATIC int xfs_attr_node_addname(xfs_da_args_t *args); +STATIC int xfs_attr_node_removename(xfs_da_args_t *args); +STATIC int xfs_attr_fillstate(xfs_da_state_t *state); +STATIC int xfs_attr_refillstate(xfs_da_state_t *state); + + +STATIC int +xfs_attr_args_init( + struct xfs_da_args *args, + struct xfs_inode *dp, + const unsigned char *name, + int flags) +{ + + if (!name) + return EINVAL; + + memset(args, 0, sizeof(*args)); + args->geo = dp->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->dp = dp; + args->flags = flags; + args->name = name; + args->namelen = strlen((const char *)name); + if (args->namelen >= MAXNAMELEN) + return EFAULT; /* match IRIX behaviour */ + + args->hashval = xfs_da_hashname(args->name, args->namelen); + return 0; +} + +int +xfs_inode_hasattr( + struct xfs_inode *ip) +{ + if (!XFS_IFORK_Q(ip) || + (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_anextents == 0)) + return 0; + return 1; +} + +/*======================================================================== + * Overall external interface routines. + *========================================================================*/ + +int +xfs_attr_get( + struct xfs_inode *ip, + const unsigned char *name, + unsigned char *value, + int *valuelenp, + int flags) +{ + struct xfs_da_args args; + uint lock_mode; + int error; + + XFS_STATS_INC(xs_attr_get); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EIO; + + if (!xfs_inode_hasattr(ip)) + return ENOATTR; + + error = xfs_attr_args_init(&args, ip, name, flags); + if (error) + return error; + + args.value = value; + args.valuelen = *valuelenp; + + lock_mode = xfs_ilock_attr_map_shared(ip); + if (!xfs_inode_hasattr(ip)) + error = ENOATTR; + else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + error = xfs_attr_shortform_getvalue(&args); + else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) + error = xfs_attr_leaf_get(&args); + else + error = xfs_attr_node_get(&args); + xfs_iunlock(ip, lock_mode); + + *valuelenp = args.valuelen; + return error == EEXIST ? 0 : error; +} + +/* + * Calculate how many blocks we need for the new attribute, + */ +STATIC int +xfs_attr_calc_size( + struct xfs_da_args *args, + int *local) +{ + struct xfs_mount *mp = args->dp->i_mount; + int size; + int nblks; + + /* + * Determine space new attribute will use, and if it would be + * "local" or "remote" (note: local != inline). + */ + size = xfs_attr_leaf_newentsize(args, local); + nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); + if (*local) { + if (size > (args->geo->blksize / 2)) { + /* Double split possible */ + nblks *= 2; + } + } else { + /* + * Out of line attribute, cannot double split, but + * make room for the attribute value itself. + */ + uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen); + nblks += dblocks; + nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); + } + + return nblks; +} + +int +xfs_attr_set( + struct xfs_inode *dp, + const unsigned char *name, + unsigned char *value, + int valuelen, + int flags) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; + struct xfs_trans_res tres; + xfs_fsblock_t firstblock; + int rsvd = (flags & ATTR_ROOT) != 0; + int error, err2, committed, local; + + XFS_STATS_INC(xs_attr_set); + + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + return EIO; + + error = xfs_attr_args_init(&args, dp, name, flags); + if (error) + return error; + + args.value = value; + args.valuelen = valuelen; + args.firstblock = &firstblock; + args.flist = &flist; + args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + args.total = xfs_attr_calc_size(&args, &local); + + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; + + /* + * If the inode doesn't have an attribute fork, add one. + * (inode must not be locked when we call this routine) + */ + if (XFS_IFORK_Q(dp) == 0) { + int sf_size = sizeof(xfs_attr_sf_hdr_t) + + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); + + error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + if (error) + return error; + } + + /* + * Start our first transaction of the day. + * + * All future transactions during this code must be "chained" off + * this one via the trans_dup() call. All transactions will contain + * the inode, and the inode will always be marked with trans_ihold(). + * Since the inode will be locked in all transactions, we must log + * the inode in every transaction to let it float upward through + * the log. + */ + args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); + + /* + * Root fork attributes can use reserved data blocks for this + * operation if necessary + */ + + if (rsvd) + args.trans->t_flags |= XFS_TRANS_RESERVE; + + tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; + tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; + tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; + error = xfs_trans_reserve(args.trans, &tres, args.total, 0); + if (error) { + xfs_trans_cancel(args.trans, 0); + return error; + } + xfs_ilock(dp, XFS_ILOCK_EXCL); + + error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, + rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : + XFS_QMOPT_RES_REGBLKS); + if (error) { + xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); + return error; + } + + xfs_trans_ijoin(args.trans, dp, 0); + + /* + * If the attribute list is non-existent or a shortform list, + * upgrade it to a single-leaf-block attribute list. + */ + if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || + (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + dp->i_d.di_anextents == 0)) { + + /* + * Build initial attribute list (if required). + */ + if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) + xfs_attr_shortform_create(&args); + + /* + * Try to add the attr to the attribute list in + * the inode. + */ + error = xfs_attr_shortform_addname(&args); + if (error != ENOSPC) { + /* + * Commit the shortform mods, and we're done. + * NOTE: this is also the error path (EEXIST, etc). + */ + ASSERT(args.trans != NULL); + + /* + * If this is a synchronous mount, make sure that + * the transaction goes to disk before returning + * to the user. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(args.trans); + + if (!error && (flags & ATTR_KERNOTIME) == 0) { + xfs_trans_ichgtime(args.trans, dp, + XFS_ICHGTIME_CHG); + } + err2 = xfs_trans_commit(args.trans, + XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + + return error ? error : err2; + } + + /* + * It won't fit in the shortform, transform to a leaf block. + * GROT: another possible req'mt for a double-split btree op. + */ + xfs_bmap_init(args.flist, args.firstblock); + error = xfs_attr_shortform_to_leaf(&args); + if (!error) { + error = xfs_bmap_finish(&args.trans, args.flist, + &committed); + } + if (error) { + ASSERT(committed); + args.trans = NULL; + xfs_bmap_cancel(&flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args.trans, dp, 0); + + /* + * Commit the leaf transformation. We'll need another (linked) + * transaction to add the new attribute to the leaf. + */ + + error = xfs_trans_roll(&args.trans, dp); + if (error) + goto out; + + } + + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) + error = xfs_attr_leaf_addname(&args); + else + error = xfs_attr_node_addname(&args); + if (error) + goto out; + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(args.trans); + + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + + /* + * Commit the last in the sequence of transactions. + */ + xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); + error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + + return error; + +out: + if (args.trans) { + xfs_trans_cancel(args.trans, + XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + } + xfs_iunlock(dp, XFS_ILOCK_EXCL); + return error; +} + +/* + * Generic handler routine to remove a name from an attribute list. + * Transitions attribute list from Btree to shortform as necessary. + */ +int +xfs_attr_remove( + struct xfs_inode *dp, + const unsigned char *name, + int flags) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; + xfs_fsblock_t firstblock; + int error; + + XFS_STATS_INC(xs_attr_remove); + + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + return EIO; + + if (!xfs_inode_hasattr(dp)) + return ENOATTR; + + error = xfs_attr_args_init(&args, dp, name, flags); + if (error) + return error; + + args.firstblock = &firstblock; + args.flist = &flist; + + /* + * we have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail. + */ + args.op_flags = XFS_DA_OP_OKNOENT; + + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; + + /* + * Start our first transaction of the day. + * + * All future transactions during this code must be "chained" off + * this one via the trans_dup() call. All transactions will contain + * the inode, and the inode will always be marked with trans_ihold(). + * Since the inode will be locked in all transactions, we must log + * the inode in every transaction to let it float upward through + * the log. + */ + args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM); + + /* + * Root fork attributes can use reserved data blocks for this + * operation if necessary + */ + + if (flags & ATTR_ROOT) + args.trans->t_flags |= XFS_TRANS_RESERVE; + + error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, + XFS_ATTRRM_SPACE_RES(mp), 0); + if (error) { + xfs_trans_cancel(args.trans, 0); + return error; + } + + xfs_ilock(dp, XFS_ILOCK_EXCL); + /* + * No need to make quota reservations here. We expect to release some + * blocks not allocate in the common case. + */ + xfs_trans_ijoin(args.trans, dp, 0); + + if (!xfs_inode_hasattr(dp)) { + error = ENOATTR; + } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); + error = xfs_attr_shortform_remove(&args); + } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + error = xfs_attr_leaf_removename(&args); + } else { + error = xfs_attr_node_removename(&args); + } + + if (error) + goto out; + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(args.trans); + + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + + /* + * Commit the last in the sequence of transactions. + */ + xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); + error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + + return error; + +out: + if (args.trans) { + xfs_trans_cancel(args.trans, + XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + } + xfs_iunlock(dp, XFS_ILOCK_EXCL); + return error; +} + +/*======================================================================== + * External routines when attribute list is inside the inode + *========================================================================*/ + +/* + * Add a name to the shortform attribute list structure + * This is the external routine. + */ +STATIC int +xfs_attr_shortform_addname(xfs_da_args_t *args) +{ + int newsize, forkoff, retval; + + trace_xfs_attr_sf_addname(args); + + retval = xfs_attr_shortform_lookup(args); + if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + return retval; + } else if (retval == EEXIST) { + if (args->flags & ATTR_CREATE) + return retval; + retval = xfs_attr_shortform_remove(args); + ASSERT(retval == 0); + } + + if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || + args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) + return ENOSPC; + + newsize = XFS_ATTR_SF_TOTSIZE(args->dp); + newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + + forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); + if (!forkoff) + return ENOSPC; + + xfs_attr_shortform_add(args, forkoff); + return 0; +} + + +/*======================================================================== + * External routines when attribute list is one block + *========================================================================*/ + +/* + * Add a name to the leaf attribute list structure + * + * This leaf block cannot have a "remote" value, we only call this routine + * if bmap_one_block() says there is only one block (ie: no remote blks). + */ +STATIC int +xfs_attr_leaf_addname(xfs_da_args_t *args) +{ + xfs_inode_t *dp; + struct xfs_buf *bp; + int retval, error, committed, forkoff; + + trace_xfs_attr_leaf_addname(args); + + /* + * Read the (only) block in the attribute list in. + */ + dp = args->dp; + args->blkno = 0; + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; + + /* + * Look up the given attribute in the leaf block. Figure out if + * the given flags produce an error or call for an atomic rename. + */ + retval = xfs_attr3_leaf_lookup_int(bp, args); + if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + xfs_trans_brelse(args->trans, bp); + return retval; + } else if (retval == EEXIST) { + if (args->flags & ATTR_CREATE) { /* pure create op */ + xfs_trans_brelse(args->trans, bp); + return retval; + } + + trace_xfs_attr_leaf_replace(args); + + /* save the attribute state for later removal*/ + args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ + args->blkno2 = args->blkno; /* set 2nd entry info*/ + args->index2 = args->index; + args->rmtblkno2 = args->rmtblkno; + args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ + args->rmtblkno = 0; + args->rmtblkcnt = 0; + args->rmtvaluelen = 0; + } + + /* + * Add the attribute to the leaf block, transitioning to a Btree + * if required. + */ + retval = xfs_attr3_leaf_add(bp, args); + if (retval == ENOSPC) { + /* + * Promote the attribute list to the Btree format, then + * Commit that transaction so that the node_addname() call + * can manage its own transactions. + */ + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_attr3_leaf_to_node(args); + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + return error; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + + /* + * Commit the current trans (including the inode) and start + * a new one. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + return error; + + /* + * Fob the whole rest of the problem off on the Btree code. + */ + error = xfs_attr_node_addname(args); + return error; + } + + /* + * Commit the transaction that added the attr name so that + * later routines can manage their own transactions. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + return error; + + /* + * If there was an out-of-line value, allocate the blocks we + * identified for its storage and copy the value. This is done + * after we create the attribute so that we don't overflow the + * maximum size of a transaction and/or hit a deadlock. + */ + if (args->rmtblkno > 0) { + error = xfs_attr_rmtval_set(args); + if (error) + return error; + } + + /* + * If this is an atomic rename operation, we must "flip" the + * incomplete flags on the "new" and "old" attribute/value pairs + * so that one disappears and one appears atomically. Then we + * must remove the "old" attribute/value pair. + */ + if (args->op_flags & XFS_DA_OP_RENAME) { + /* + * In a separate transaction, set the incomplete flag on the + * "old" attr and clear the incomplete flag on the "new" attr. + */ + error = xfs_attr3_leaf_flipflags(args); + if (error) + return error; + + /* + * Dismantle the "old" attribute/value pair by removing + * a "remote" value (if it exists). + */ + args->index = args->index2; + args->blkno = args->blkno2; + args->rmtblkno = args->rmtblkno2; + args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; + if (args->rmtblkno) { + error = xfs_attr_rmtval_remove(args); + if (error) + return error; + } + + /* + * Read in the block containing the "old" attr, then + * remove the "old" attr from that block (neat, huh!) + */ + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, + -1, &bp); + if (error) + return error; + + xfs_attr3_leaf_remove(bp, args); + + /* + * If the result is small enough, shrink it all into the inode. + */ + if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + if (!error) { + error = xfs_bmap_finish(&args->trans, + args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + return error; + } + + /* + * bmap_finish() may have committed the last trans + * and started a new one. We need the inode to be + * in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } + + /* + * Commit the remove and start the next trans in series. + */ + error = xfs_trans_roll(&args->trans, dp); + + } else if (args->rmtblkno > 0) { + /* + * Added a "remote" value, just clear the incomplete flag. + */ + error = xfs_attr3_leaf_clearflag(args); + } + return error; +} + +/* + * Remove a name from the leaf attribute list structure + * + * This leaf block cannot have a "remote" value, we only call this routine + * if bmap_one_block() says there is only one block (ie: no remote blks). + */ +STATIC int +xfs_attr_leaf_removename(xfs_da_args_t *args) +{ + xfs_inode_t *dp; + struct xfs_buf *bp; + int error, committed, forkoff; + + trace_xfs_attr_leaf_removename(args); + + /* + * Remove the attribute. + */ + dp = args->dp; + args->blkno = 0; + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; + + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error == ENOATTR) { + xfs_trans_brelse(args->trans, bp); + return error; + } + + xfs_attr3_leaf_remove(bp, args); + + /* + * If the result is small enough, shrink it all into the inode. + */ + if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + return error; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } + return 0; +} + +/* + * Look up a name in a leaf attribute list structure. + * + * This leaf block cannot have a "remote" value, we only call this routine + * if bmap_one_block() says there is only one block (ie: no remote blks). + */ +STATIC int +xfs_attr_leaf_get(xfs_da_args_t *args) +{ + struct xfs_buf *bp; + int error; + + trace_xfs_attr_leaf_get(args); + + args->blkno = 0; + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; + + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error != EEXIST) { + xfs_trans_brelse(args->trans, bp); + return error; + } + error = xfs_attr3_leaf_getvalue(bp, args); + xfs_trans_brelse(args->trans, bp); + if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { + error = xfs_attr_rmtval_get(args); + } + return error; +} + +/*======================================================================== + * External routines when attribute list size > geo->blksize + *========================================================================*/ + +/* + * Add a name to a Btree-format attribute list. + * + * This will involve walking down the Btree, and may involve splitting + * leaf nodes and even splitting intermediate nodes up to and including + * the root node (a special case of an intermediate node). + * + * "Remote" attribute values confuse the issue and atomic rename operations + * add a whole extra layer of confusion on top of that. + */ +STATIC int +xfs_attr_node_addname(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + xfs_inode_t *dp; + xfs_mount_t *mp; + int committed, retval, error; + + trace_xfs_attr_node_addname(args); + + /* + * Fill in bucket of arguments/results/context to carry around. + */ + dp = args->dp; + mp = dp->i_mount; +restart: + state = xfs_da_state_alloc(); + state->args = args; + state->mp = mp; + + /* + * Search to see if name already exists, and get back a pointer + * to where it should go. + */ + error = xfs_da3_node_lookup_int(state, &retval); + if (error) + goto out; + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + goto out; + } else if (retval == EEXIST) { + if (args->flags & ATTR_CREATE) + goto out; + + trace_xfs_attr_node_replace(args); + + /* save the attribute state for later removal*/ + args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ + args->blkno2 = args->blkno; /* set 2nd entry info*/ + args->index2 = args->index; + args->rmtblkno2 = args->rmtblkno; + args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ + args->rmtblkno = 0; + args->rmtblkcnt = 0; + args->rmtvaluelen = 0; + } + + retval = xfs_attr3_leaf_add(blk->bp, state->args); + if (retval == ENOSPC) { + if (state->path.active == 1) { + /* + * Its really a single leaf node, but it had + * out-of-line values so it looked like it *might* + * have been a b-tree. + */ + xfs_da_state_free(state); + state = NULL; + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_attr3_leaf_to_node(args); + if (!error) { + error = xfs_bmap_finish(&args->trans, + args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans + * and started a new one. We need the inode to be + * in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + + /* + * Commit the node conversion and start the next + * trans in the chain. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + goto out; + + goto restart; + } + + /* + * Split as many Btree elements as required. + * This code tracks the new and old attr's location + * in the index/blkno/rmtblkno/rmtblkcnt fields and + * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. + */ + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_da3_split(state); + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } else { + /* + * Addition succeeded, update Btree hashvals. + */ + xfs_da3_fixhashpath(state, &state->path); + } + + /* + * Kill the state structure, we're done with it and need to + * allow the buffers to come back later. + */ + xfs_da_state_free(state); + state = NULL; + + /* + * Commit the leaf addition or btree split and start the next + * trans in the chain. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + goto out; + + /* + * If there was an out-of-line value, allocate the blocks we + * identified for its storage and copy the value. This is done + * after we create the attribute so that we don't overflow the + * maximum size of a transaction and/or hit a deadlock. + */ + if (args->rmtblkno > 0) { + error = xfs_attr_rmtval_set(args); + if (error) + return error; + } + + /* + * If this is an atomic rename operation, we must "flip" the + * incomplete flags on the "new" and "old" attribute/value pairs + * so that one disappears and one appears atomically. Then we + * must remove the "old" attribute/value pair. + */ + if (args->op_flags & XFS_DA_OP_RENAME) { + /* + * In a separate transaction, set the incomplete flag on the + * "old" attr and clear the incomplete flag on the "new" attr. + */ + error = xfs_attr3_leaf_flipflags(args); + if (error) + goto out; + + /* + * Dismantle the "old" attribute/value pair by removing + * a "remote" value (if it exists). + */ + args->index = args->index2; + args->blkno = args->blkno2; + args->rmtblkno = args->rmtblkno2; + args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; + if (args->rmtblkno) { + error = xfs_attr_rmtval_remove(args); + if (error) + return error; + } + + /* + * Re-find the "old" attribute entry after any split ops. + * The INCOMPLETE flag means that we will find the "old" + * attr, not the "new" one. + */ + args->flags |= XFS_ATTR_INCOMPLETE; + state = xfs_da_state_alloc(); + state->args = args; + state->mp = mp; + state->inleaf = 0; + error = xfs_da3_node_lookup_int(state, &retval); + if (error) + goto out; + + /* + * Remove the name and update the hashvals in the tree. + */ + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + error = xfs_attr3_leaf_remove(blk->bp, args); + xfs_da3_fixhashpath(state, &state->path); + + /* + * Check to see if the tree needs to be collapsed. + */ + if (retval && (state->path.active > 1)) { + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_da3_join(state); + if (!error) { + error = xfs_bmap_finish(&args->trans, + args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans + * and started a new one. We need the inode to be + * in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } + + /* + * Commit and start the next trans in the chain. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + goto out; + + } else if (args->rmtblkno > 0) { + /* + * Added a "remote" value, just clear the incomplete flag. + */ + error = xfs_attr3_leaf_clearflag(args); + if (error) + goto out; + } + retval = error = 0; + +out: + if (state) + xfs_da_state_free(state); + if (error) + return error; + return retval; +} + +/* + * Remove a name from a B-tree attribute list. + * + * This will involve walking down the Btree, and may involve joining + * leaf nodes and even joining intermediate nodes up to and including + * the root node (a special case of an intermediate node). + */ +STATIC int +xfs_attr_node_removename(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + xfs_inode_t *dp; + struct xfs_buf *bp; + int retval, error, committed, forkoff; + + trace_xfs_attr_node_removename(args); + + /* + * Tie a string around our finger to remind us where we are. + */ + dp = args->dp; + state = xfs_da_state_alloc(); + state->args = args; + state->mp = dp->i_mount; + + /* + * Search to see if name exists, and get back a pointer to it. + */ + error = xfs_da3_node_lookup_int(state, &retval); + if (error || (retval != EEXIST)) { + if (error == 0) + error = retval; + goto out; + } + + /* + * If there is an out-of-line value, de-allocate the blocks. + * This is done before we remove the attribute so that we don't + * overflow the maximum size of a transaction and/or hit a deadlock. + */ + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->bp != NULL); + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + if (args->rmtblkno > 0) { + /* + * Fill in disk block numbers in the state structure + * so that we can get the buffers back after we commit + * several transactions in the following calls. + */ + error = xfs_attr_fillstate(state); + if (error) + goto out; + + /* + * Mark the attribute as INCOMPLETE, then bunmapi() the + * remote value. + */ + error = xfs_attr3_leaf_setflag(args); + if (error) + goto out; + error = xfs_attr_rmtval_remove(args); + if (error) + goto out; + + /* + * Refill the state structure with buffers, the prior calls + * released our buffers. + */ + error = xfs_attr_refillstate(state); + if (error) + goto out; + } + + /* + * Remove the name and update the hashvals in the tree. + */ + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + retval = xfs_attr3_leaf_remove(blk->bp, args); + xfs_da3_fixhashpath(state, &state->path); + + /* + * Check to see if the tree needs to be collapsed. + */ + if (retval && (state->path.active > 1)) { + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_da3_join(state); + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + + /* + * Commit the Btree join operation and start a new trans. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + goto out; + } + + /* + * If the result is small enough, push it all into the inode. + */ + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + /* + * Have to get rid of the copy of this dabuf in the state. + */ + ASSERT(state->path.active == 1); + ASSERT(state->path.blk[0].bp); + state->path.blk[0].bp = NULL; + + error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp); + if (error) + goto out; + + if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + if (!error) { + error = xfs_bmap_finish(&args->trans, + args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + goto out; + } + + /* + * bmap_finish() may have committed the last trans + * and started a new one. We need the inode to be + * in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } else + xfs_trans_brelse(args->trans, bp); + } + error = 0; + +out: + xfs_da_state_free(state); + return error; +} + +/* + * Fill in the disk block numbers in the state structure for the buffers + * that are attached to the state structure. + * This is done so that we can quickly reattach ourselves to those buffers + * after some set of transaction commits have released these buffers. + */ +STATIC int +xfs_attr_fillstate(xfs_da_state_t *state) +{ + xfs_da_state_path_t *path; + xfs_da_state_blk_t *blk; + int level; + + trace_xfs_attr_fillstate(state->args); + + /* + * Roll down the "path" in the state structure, storing the on-disk + * block number for those buffers in the "path". + */ + path = &state->path; + ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + for (blk = path->blk, level = 0; level < path->active; blk++, level++) { + if (blk->bp) { + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); + blk->bp = NULL; + } else { + blk->disk_blkno = 0; + } + } + + /* + * Roll down the "altpath" in the state structure, storing the on-disk + * block number for those buffers in the "altpath". + */ + path = &state->altpath; + ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + for (blk = path->blk, level = 0; level < path->active; blk++, level++) { + if (blk->bp) { + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); + blk->bp = NULL; + } else { + blk->disk_blkno = 0; + } + } + + return 0; +} + +/* + * Reattach the buffers to the state structure based on the disk block + * numbers stored in the state structure. + * This is done after some set of transaction commits have released those + * buffers from our grip. + */ +STATIC int +xfs_attr_refillstate(xfs_da_state_t *state) +{ + xfs_da_state_path_t *path; + xfs_da_state_blk_t *blk; + int level, error; + + trace_xfs_attr_refillstate(state->args); + + /* + * Roll down the "path" in the state structure, storing the on-disk + * block number for those buffers in the "path". + */ + path = &state->path; + ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + for (blk = path->blk, level = 0; level < path->active; blk++, level++) { + if (blk->disk_blkno) { + error = xfs_da3_node_read(state->args->trans, + state->args->dp, + blk->blkno, blk->disk_blkno, + &blk->bp, XFS_ATTR_FORK); + if (error) + return error; + } else { + blk->bp = NULL; + } + } + + /* + * Roll down the "altpath" in the state structure, storing the on-disk + * block number for those buffers in the "altpath". + */ + path = &state->altpath; + ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + for (blk = path->blk, level = 0; level < path->active; blk++, level++) { + if (blk->disk_blkno) { + error = xfs_da3_node_read(state->args->trans, + state->args->dp, + blk->blkno, blk->disk_blkno, + &blk->bp, XFS_ATTR_FORK); + if (error) + return error; + } else { + blk->bp = NULL; + } + } + + return 0; +} + +/* + * Look up a filename in a node attribute list. + * + * This routine gets called for any attribute fork that has more than one + * block, ie: both true Btree attr lists and for single-leaf-blocks with + * "remote" values taking up more blocks. + */ +STATIC int +xfs_attr_node_get(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + int error, retval; + int i; + + trace_xfs_attr_node_get(args); + + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + + /* + * Search to see if name exists, and get back a pointer to it. + */ + error = xfs_da3_node_lookup_int(state, &retval); + if (error) { + retval = error; + } else if (retval == EEXIST) { + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->bp != NULL); + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + + /* + * Get the value, local or "remote" + */ + retval = xfs_attr3_leaf_getvalue(blk->bp, args); + if (!retval && (args->rmtblkno > 0) + && !(args->flags & ATTR_KERNOVAL)) { + retval = xfs_attr_rmtval_get(args); + } + } + + /* + * If not in a transaction, we have to release all the buffers. + */ + for (i = 0; i < state->path.active; i++) { + xfs_trans_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + + xfs_da_state_free(state); + return retval; +} diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c new file mode 100644 index 000000000000..127d96aba845 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -0,0 +1,2697 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_buf_item.h" +#include "xfs_cksum.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h" + + +/* + * xfs_attr_leaf.c + * + * Routines to implement leaf blocks of attributes as Btrees of hashed names. + */ + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Routines used for growing the Btree. + */ +STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args, + xfs_dablk_t which_block, struct xfs_buf **bpp); +STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, + struct xfs_attr3_icleaf_hdr *ichdr, + struct xfs_da_args *args, int freemap_index); +STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args, + struct xfs_attr3_icleaf_hdr *ichdr, + struct xfs_buf *leaf_buffer); +STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state, + xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2); +STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state, + xfs_da_state_blk_t *leaf_blk_1, + struct xfs_attr3_icleaf_hdr *ichdr1, + xfs_da_state_blk_t *leaf_blk_2, + struct xfs_attr3_icleaf_hdr *ichdr2, + int *number_entries_in_blk1, + int *number_usedbytes_in_blk1); + +/* + * Utility routines. + */ +STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, + struct xfs_attr_leafblock *src_leaf, + struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, + struct xfs_attr_leafblock *dst_leaf, + struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, + int move_count); +STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); + +void +xfs_attr3_leaf_hdr_from_disk( + struct xfs_attr3_icleaf_hdr *to, + struct xfs_attr_leafblock *from) +{ + int i; + + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || + from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); + + if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) { + struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from; + + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->count); + to->usedbytes = be16_to_cpu(hdr3->usedbytes); + to->firstused = be16_to_cpu(hdr3->firstused); + to->holes = hdr3->holes; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base); + to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size); + } + return; + } + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.count); + to->usedbytes = be16_to_cpu(from->hdr.usedbytes); + to->firstused = be16_to_cpu(from->hdr.firstused); + to->holes = from->hdr.holes; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base); + to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size); + } +} + +void +xfs_attr3_leaf_hdr_to_disk( + struct xfs_attr_leafblock *to, + struct xfs_attr3_icleaf_hdr *from) +{ + int i; + + ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || + from->magic == XFS_ATTR3_LEAF_MAGIC); + + if (from->magic == XFS_ATTR3_LEAF_MAGIC) { + struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to; + + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->count = cpu_to_be16(from->count); + hdr3->usedbytes = cpu_to_be16(from->usedbytes); + hdr3->firstused = cpu_to_be16(from->firstused); + hdr3->holes = from->holes; + hdr3->pad1 = 0; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); + hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); + } + return; + } + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.count = cpu_to_be16(from->count); + to->hdr.usedbytes = cpu_to_be16(from->usedbytes); + to->hdr.firstused = cpu_to_be16(from->firstused); + to->hdr.holes = from->holes; + to->hdr.pad1 = 0; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); + to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); + } +} + +static bool +xfs_attr3_leaf_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_attr3_icleaf_hdr ichdr; + + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + + if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) + return false; + + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) + return false; + } else { + if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) + return false; + } + if (ichdr.count == 0) + return false; + + /* XXX: need to range check rest of attr header values */ + /* XXX: hash order check? */ + + return true; +} + +static void +xfs_attr3_leaf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + + if (!xfs_attr3_leaf_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); +} + +/* + * leaf/node format detection on trees is sketchy, so a node read can be done on + * leaf level blocks when detection identifies the tree as a node format tree + * incorrectly. In this case, we need to swap the verifier to match the correct + * format of the block being read. + */ +static void +xfs_attr3_leaf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_attr3_leaf_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { + .verify_read = xfs_attr3_leaf_read_verify, + .verify_write = xfs_attr3_leaf_write_verify, +}; + +int +xfs_attr3_leaf_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + int err; + + err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, + XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); + if (!err && tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); + return err; +} + +/*======================================================================== + * Namespace helper routines + *========================================================================*/ + +/* + * If namespace bits don't match return 0. + * If all match then return 1. + */ +STATIC int +xfs_attr_namesp_match(int arg_flags, int ondisk_flags) +{ + return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); +} + + +/*======================================================================== + * External routines when attribute fork size < XFS_LITINO(mp). + *========================================================================*/ + +/* + * Query whether the requested number of additional bytes of extended + * attribute space will be able to fit inline. + * + * Returns zero if not, else the di_forkoff fork offset to be used in the + * literal area for attribute data once the new bytes have been added. + * + * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value; + * special case for dev/uuid inodes, they have fixed size data forks. + */ +int +xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) +{ + int offset; + int minforkoff; /* lower limit on valid forkoff locations */ + int maxforkoff; /* upper limit on valid forkoff locations */ + int dsize; + xfs_mount_t *mp = dp->i_mount; + + /* rounded down */ + offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; + + switch (dp->i_d.di_format) { + case XFS_DINODE_FMT_DEV: + minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; + return (offset >= minforkoff) ? minforkoff : 0; + case XFS_DINODE_FMT_UUID: + minforkoff = roundup(sizeof(uuid_t), 8) >> 3; + return (offset >= minforkoff) ? minforkoff : 0; + } + + /* + * If the requested numbers of bytes is smaller or equal to the + * current attribute fork size we can always proceed. + * + * Note that if_bytes in the data fork might actually be larger than + * the current data fork size is due to delalloc extents. In that + * case either the extent count will go down when they are converted + * to real extents, or the delalloc conversion will take care of the + * literal area rebalancing. + */ + if (bytes <= XFS_IFORK_ASIZE(dp)) + return dp->i_d.di_forkoff; + + /* + * For attr2 we can try to move the forkoff if there is space in the + * literal area, but for the old format we are done if there is no + * space in the fixed attribute fork. + */ + if (!(mp->m_flags & XFS_MOUNT_ATTR2)) + return 0; + + dsize = dp->i_df.if_bytes; + + switch (dp->i_d.di_format) { + case XFS_DINODE_FMT_EXTENTS: + /* + * If there is no attr fork and the data fork is extents, + * determine if creating the default attr fork will result + * in the extents form migrating to btree. If so, the + * minimum offset only needs to be the space required for + * the btree root. + */ + if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > + xfs_default_attroffset(dp)) + dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + break; + case XFS_DINODE_FMT_BTREE: + /* + * If we have a data btree then keep forkoff if we have one, + * otherwise we are adding a new attr, so then we set + * minforkoff to where the btree root can finish so we have + * plenty of room for attrs + */ + if (dp->i_d.di_forkoff) { + if (offset < dp->i_d.di_forkoff) + return 0; + return dp->i_d.di_forkoff; + } + dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot); + break; + } + + /* + * A data fork btree root must have space for at least + * MINDBTPTRS key/ptr pairs if the data fork is small or empty. + */ + minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); + minforkoff = roundup(minforkoff, 8) >> 3; + + /* attr fork btree root can have at least this many key/ptr pairs */ + maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - + XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = maxforkoff >> 3; /* rounded down */ + + if (offset >= maxforkoff) + return maxforkoff; + if (offset >= minforkoff) + return offset; + return 0; +} + +/* + * Switch on the ATTR2 superblock bit (implies also FEATURES2) + */ +STATIC void +xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) +{ + if ((mp->m_flags & XFS_MOUNT_ATTR2) && + !(xfs_sb_version_hasattr2(&mp->m_sb))) { + spin_lock(&mp->m_sb_lock); + if (!xfs_sb_version_hasattr2(&mp->m_sb)) { + xfs_sb_version_addattr2(&mp->m_sb); + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + } else + spin_unlock(&mp->m_sb_lock); + } +} + +/* + * Create the initial contents of a shortform attribute list. + */ +void +xfs_attr_shortform_create(xfs_da_args_t *args) +{ + xfs_attr_sf_hdr_t *hdr; + xfs_inode_t *dp; + xfs_ifork_t *ifp; + + trace_xfs_attr_sf_create(args); + + dp = args->dp; + ASSERT(dp != NULL); + ifp = dp->i_afp; + ASSERT(ifp != NULL); + ASSERT(ifp->if_bytes == 0); + if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) { + ifp->if_flags &= ~XFS_IFEXTENTS; /* just in case */ + dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; + ifp->if_flags |= XFS_IFINLINE; + } else { + ASSERT(ifp->if_flags & XFS_IFINLINE); + } + xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK); + hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data; + hdr->count = 0; + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); +} + +/* + * Add a name/value pair to the shortform attribute list. + * Overflow from the inode has already been checked for. + */ +void +xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) +{ + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + int i, offset, size; + xfs_mount_t *mp; + xfs_inode_t *dp; + xfs_ifork_t *ifp; + + trace_xfs_attr_sf_add(args); + + dp = args->dp; + mp = dp->i_mount; + dp->i_d.di_forkoff = forkoff; + + ifp = dp->i_afp; + ASSERT(ifp->if_flags & XFS_IFINLINE); + sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sfe = &sf->list[0]; + for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { +#ifdef DEBUG + if (sfe->namelen != args->namelen) + continue; + if (memcmp(args->name, sfe->nameval, args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) + continue; + ASSERT(0); +#endif + } + + offset = (char *)sfe - (char *)sf; + size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + xfs_idata_realloc(dp, size, XFS_ATTR_FORK); + sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset); + + sfe->namelen = args->namelen; + sfe->valuelen = args->valuelen; + sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + memcpy(sfe->nameval, args->name, args->namelen); + memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); + sf->hdr.count++; + be16_add_cpu(&sf->hdr.totsize, size); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); + + xfs_sbversion_add_attr2(mp, args->trans); +} + +/* + * After the last attribute is removed revert to original inode format, + * making all literal area available to the data fork once more. + */ +STATIC void +xfs_attr_fork_reset( + struct xfs_inode *ip, + struct xfs_trans *tp) +{ + xfs_idestroy_fork(ip, XFS_ATTR_FORK); + ip->i_d.di_forkoff = 0; + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + ASSERT(ip->i_d.di_anextents == 0); + ASSERT(ip->i_afp == NULL); + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} + +/* + * Remove an attribute from the shortform attribute list structure. + */ +int +xfs_attr_shortform_remove(xfs_da_args_t *args) +{ + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + int base, size=0, end, totsize, i; + xfs_mount_t *mp; + xfs_inode_t *dp; + + trace_xfs_attr_sf_remove(args); + + dp = args->dp; + mp = dp->i_mount; + base = sizeof(xfs_attr_sf_hdr_t); + sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; + sfe = &sf->list[0]; + end = sf->hdr.count; + for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), + base += size, i++) { + size = XFS_ATTR_SF_ENTSIZE(sfe); + if (sfe->namelen != args->namelen) + continue; + if (memcmp(sfe->nameval, args->name, args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) + continue; + break; + } + if (i == end) + return ENOATTR; + + /* + * Fix up the attribute fork data, covering the hole + */ + end = base + size; + totsize = be16_to_cpu(sf->hdr.totsize); + if (end != totsize) + memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); + sf->hdr.count--; + be16_add_cpu(&sf->hdr.totsize, -size); + + /* + * Fix up the start offset of the attribute fork + */ + totsize -= size; + if (totsize == sizeof(xfs_attr_sf_hdr_t) && + (mp->m_flags & XFS_MOUNT_ATTR2) && + (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && + !(args->op_flags & XFS_DA_OP_ADDNAME)) { + xfs_attr_fork_reset(dp, args->trans); + } else { + xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); + dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); + ASSERT(dp->i_d.di_forkoff); + ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || + (args->op_flags & XFS_DA_OP_ADDNAME) || + !(mp->m_flags & XFS_MOUNT_ATTR2) || + dp->i_d.di_format == XFS_DINODE_FMT_BTREE); + xfs_trans_log_inode(args->trans, dp, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + } + + xfs_sbversion_add_attr2(mp, args->trans); + + return 0; +} + +/* + * Look up a name in a shortform attribute list structure. + */ +/*ARGSUSED*/ +int +xfs_attr_shortform_lookup(xfs_da_args_t *args) +{ + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + int i; + xfs_ifork_t *ifp; + + trace_xfs_attr_sf_lookup(args); + + ifp = args->dp->i_afp; + ASSERT(ifp->if_flags & XFS_IFINLINE); + sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sfe = &sf->list[0]; + for (i = 0; i < sf->hdr.count; + sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + if (sfe->namelen != args->namelen) + continue; + if (memcmp(args->name, sfe->nameval, args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) + continue; + return EEXIST; + } + return ENOATTR; +} + +/* + * Look up a name in a shortform attribute list structure. + */ +/*ARGSUSED*/ +int +xfs_attr_shortform_getvalue(xfs_da_args_t *args) +{ + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + int i; + + ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); + sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; + sfe = &sf->list[0]; + for (i = 0; i < sf->hdr.count; + sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + if (sfe->namelen != args->namelen) + continue; + if (memcmp(args->name, sfe->nameval, args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, sfe->flags)) + continue; + if (args->flags & ATTR_KERNOVAL) { + args->valuelen = sfe->valuelen; + return EEXIST; + } + if (args->valuelen < sfe->valuelen) { + args->valuelen = sfe->valuelen; + return ERANGE; + } + args->valuelen = sfe->valuelen; + memcpy(args->value, &sfe->nameval[args->namelen], + args->valuelen); + return EEXIST; + } + return ENOATTR; +} + +/* + * Convert from using the shortform to the leaf. + */ +int +xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +{ + xfs_inode_t *dp; + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + xfs_da_args_t nargs; + char *tmpbuffer; + int error, i, size; + xfs_dablk_t blkno; + struct xfs_buf *bp; + xfs_ifork_t *ifp; + + trace_xfs_attr_sf_to_leaf(args); + + dp = args->dp; + ifp = dp->i_afp; + sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + size = be16_to_cpu(sf->hdr.totsize); + tmpbuffer = kmem_alloc(size, KM_SLEEP); + ASSERT(tmpbuffer != NULL); + memcpy(tmpbuffer, ifp->if_u1.if_data, size); + sf = (xfs_attr_shortform_t *)tmpbuffer; + + xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); + xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); + + bp = NULL; + error = xfs_da_grow_inode(args, &blkno); + if (error) { + /* + * If we hit an IO error middle of the transaction inside + * grow_inode(), we may have inconsistent data. Bail out. + */ + if (error == EIO) + goto out; + xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ + memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + goto out; + } + + ASSERT(blkno == 0); + error = xfs_attr3_leaf_create(args, blkno, &bp); + if (error) { + error = xfs_da_shrink_inode(args, 0, bp); + bp = NULL; + if (error) + goto out; + xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ + memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + goto out; + } + + memset((char *)&nargs, 0, sizeof(nargs)); + nargs.dp = dp; + nargs.geo = args->geo; + nargs.firstblock = args->firstblock; + nargs.flist = args->flist; + nargs.total = args->total; + nargs.whichfork = XFS_ATTR_FORK; + nargs.trans = args->trans; + nargs.op_flags = XFS_DA_OP_OKNOENT; + + sfe = &sf->list[0]; + for (i = 0; i < sf->hdr.count; i++) { + nargs.name = sfe->nameval; + nargs.namelen = sfe->namelen; + nargs.value = &sfe->nameval[nargs.namelen]; + nargs.valuelen = sfe->valuelen; + nargs.hashval = xfs_da_hashname(sfe->nameval, + sfe->namelen); + nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); + error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ + ASSERT(error == ENOATTR); + error = xfs_attr3_leaf_add(bp, &nargs); + ASSERT(error != ENOSPC); + if (error) + goto out; + sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + } + error = 0; + +out: + kmem_free(tmpbuffer); + return error; +} + +/* + * Check a leaf attribute block to see if all the entries would fit into + * a shortform attribute list. + */ +int +xfs_attr_shortform_allfit( + struct xfs_buf *bp, + struct xfs_inode *dp) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_entry *entry; + xfs_attr_leaf_name_local_t *name_loc; + struct xfs_attr3_icleaf_hdr leafhdr; + int bytes; + int i; + + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); + entry = xfs_attr3_leaf_entryp(leaf); + + bytes = sizeof(struct xfs_attr_sf_hdr); + for (i = 0; i < leafhdr.count; entry++, i++) { + if (entry->flags & XFS_ATTR_INCOMPLETE) + continue; /* don't copy partial entries */ + if (!(entry->flags & XFS_ATTR_LOCAL)) + return 0; + name_loc = xfs_attr3_leaf_name_local(leaf, i); + if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) + return 0; + if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) + return 0; + bytes += sizeof(struct xfs_attr_sf_entry) - 1 + + name_loc->namelen + + be16_to_cpu(name_loc->valuelen); + } + if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && + (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && + (bytes == sizeof(struct xfs_attr_sf_hdr))) + return -1; + return xfs_attr_shortform_bytesfit(dp, bytes); +} + +/* + * Convert a leaf attribute list to shortform attribute list + */ +int +xfs_attr3_leaf_to_shortform( + struct xfs_buf *bp, + struct xfs_da_args *args, + int forkoff) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_name_local *name_loc; + struct xfs_da_args nargs; + struct xfs_inode *dp = args->dp; + char *tmpbuffer; + int error; + int i; + + trace_xfs_attr_leaf_to_sf(args); + + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + if (!tmpbuffer) + return ENOMEM; + + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); + + leaf = (xfs_attr_leafblock_t *)tmpbuffer; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + entry = xfs_attr3_leaf_entryp(leaf); + + /* XXX (dgc): buffer is about to be marked stale - why zero it? */ + memset(bp->b_addr, 0, args->geo->blksize); + + /* + * Clean out the prior contents of the attribute list. + */ + error = xfs_da_shrink_inode(args, 0, bp); + if (error) + goto out; + + if (forkoff == -1) { + ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); + ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); + xfs_attr_fork_reset(dp, args->trans); + goto out; + } + + xfs_attr_shortform_create(args); + + /* + * Copy the attributes + */ + memset((char *)&nargs, 0, sizeof(nargs)); + nargs.geo = args->geo; + nargs.dp = dp; + nargs.firstblock = args->firstblock; + nargs.flist = args->flist; + nargs.total = args->total; + nargs.whichfork = XFS_ATTR_FORK; + nargs.trans = args->trans; + nargs.op_flags = XFS_DA_OP_OKNOENT; + + for (i = 0; i < ichdr.count; entry++, i++) { + if (entry->flags & XFS_ATTR_INCOMPLETE) + continue; /* don't copy partial entries */ + if (!entry->nameidx) + continue; + ASSERT(entry->flags & XFS_ATTR_LOCAL); + name_loc = xfs_attr3_leaf_name_local(leaf, i); + nargs.name = name_loc->nameval; + nargs.namelen = name_loc->namelen; + nargs.value = &name_loc->nameval[nargs.namelen]; + nargs.valuelen = be16_to_cpu(name_loc->valuelen); + nargs.hashval = be32_to_cpu(entry->hashval); + nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); + xfs_attr_shortform_add(&nargs, forkoff); + } + error = 0; + +out: + kmem_free(tmpbuffer); + return error; +} + +/* + * Convert from using a single leaf to a root node and a leaf. + */ +int +xfs_attr3_leaf_to_node( + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr icleafhdr; + struct xfs_attr_leaf_entry *entries; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr icnodehdr; + struct xfs_da_intnode *node; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *bp1 = NULL; + struct xfs_buf *bp2 = NULL; + xfs_dablk_t blkno; + int error; + + trace_xfs_attr_leaf_to_node(args); + + error = xfs_da_grow_inode(args, &blkno); + if (error) + goto out; + error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1); + if (error) + goto out; + + error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK); + if (error) + goto out; + + /* copy leaf to new buffer, update identifiers */ + xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); + bp2->b_ops = bp1->b_ops; + memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; + hdr3->blkno = cpu_to_be64(bp2->b_bn); + } + xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); + + /* + * Set up the new root node. + */ + error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); + if (error) + goto out; + node = bp1->b_addr; + dp->d_ops->node_hdr_from_disk(&icnodehdr, node); + btree = dp->d_ops->node_tree_p(node); + + leaf = bp2->b_addr; + xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); + entries = xfs_attr3_leaf_entryp(leaf); + + /* both on-disk, don't endian-flip twice */ + btree[0].hashval = entries[icleafhdr.count - 1].hashval; + btree[0].before = cpu_to_be32(blkno); + icnodehdr.count = 1; + dp->d_ops->node_hdr_to_disk(node, &icnodehdr); + xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); + error = 0; +out: + return error; +} + +/*======================================================================== + * Routines used for growing the Btree. + *========================================================================*/ + +/* + * Create the initial contents of a leaf attribute list + * or a leaf in a node attribute list. + */ +STATIC int +xfs_attr3_leaf_create( + struct xfs_da_args *args, + xfs_dablk_t blkno, + struct xfs_buf **bpp) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *bp; + int error; + + trace_xfs_attr_leaf_create(args); + + error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, + XFS_ATTR_FORK); + if (error) + return error; + bp->b_ops = &xfs_attr3_leaf_buf_ops; + xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF); + leaf = bp->b_addr; + memset(leaf, 0, args->geo->blksize); + + memset(&ichdr, 0, sizeof(ichdr)); + ichdr.firstused = args->geo->blksize; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_blkinfo *hdr3 = bp->b_addr; + + ichdr.magic = XFS_ATTR3_LEAF_MAGIC; + + hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->owner = cpu_to_be64(dp->i_ino); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + + ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr); + } else { + ichdr.magic = XFS_ATTR_LEAF_MAGIC; + ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr); + } + ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; + + xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); + xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); + + *bpp = bp; + return 0; +} + +/* + * Split the leaf node, rebalance, then add the new entry. + */ +int +xfs_attr3_leaf_split( + struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk) +{ + xfs_dablk_t blkno; + int error; + + trace_xfs_attr_leaf_split(state->args); + + /* + * Allocate space for a new leaf node. + */ + ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); + error = xfs_da_grow_inode(state->args, &blkno); + if (error) + return error; + error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); + if (error) + return error; + newblk->blkno = blkno; + newblk->magic = XFS_ATTR_LEAF_MAGIC; + + /* + * Rebalance the entries across the two leaves. + * NOTE: rebalance() currently depends on the 2nd block being empty. + */ + xfs_attr3_leaf_rebalance(state, oldblk, newblk); + error = xfs_da3_blk_link(state, oldblk, newblk); + if (error) + return error; + + /* + * Save info on "old" attribute for "atomic rename" ops, leaf_add() + * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the + * "new" attrs info. Will need the "old" info to remove it later. + * + * Insert the "new" entry in the correct block. + */ + if (state->inleaf) { + trace_xfs_attr_leaf_add_old(state->args); + error = xfs_attr3_leaf_add(oldblk->bp, state->args); + } else { + trace_xfs_attr_leaf_add_new(state->args); + error = xfs_attr3_leaf_add(newblk->bp, state->args); + } + + /* + * Update last hashval in each block since we added the name. + */ + oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); + newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); + return error; +} + +/* + * Add a name to the leaf attribute list structure. + */ +int +xfs_attr3_leaf_add( + struct xfs_buf *bp, + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + int tablesize; + int entsize; + int sum; + int tmp; + int i; + + trace_xfs_attr_leaf_add(args); + + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + ASSERT(args->index >= 0 && args->index <= ichdr.count); + entsize = xfs_attr_leaf_newentsize(args, NULL); + + /* + * Search through freemap for first-fit on new name length. + * (may need to figure in size of entry struct too) + */ + tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) + + xfs_attr3_leaf_hdr_size(leaf); + for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { + if (tablesize > ichdr.firstused) { + sum += ichdr.freemap[i].size; + continue; + } + if (!ichdr.freemap[i].size) + continue; /* no space in this map */ + tmp = entsize; + if (ichdr.freemap[i].base < ichdr.firstused) + tmp += sizeof(xfs_attr_leaf_entry_t); + if (ichdr.freemap[i].size >= tmp) { + tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i); + goto out_log_hdr; + } + sum += ichdr.freemap[i].size; + } + + /* + * If there are no holes in the address space of the block, + * and we don't have enough freespace, then compaction will do us + * no good and we should just give up. + */ + if (!ichdr.holes && sum < entsize) + return ENOSPC; + + /* + * Compact the entries to coalesce free space. + * This may change the hdr->count via dropping INCOMPLETE entries. + */ + xfs_attr3_leaf_compact(args, &ichdr, bp); + + /* + * After compaction, the block is guaranteed to have only one + * free region, in freemap[0]. If it is not big enough, give up. + */ + if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { + tmp = ENOSPC; + goto out_log_hdr; + } + + tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); + +out_log_hdr: + xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, &leaf->hdr, + xfs_attr3_leaf_hdr_size(leaf))); + return tmp; +} + +/* + * Add a name to a leaf attribute list structure. + */ +STATIC int +xfs_attr3_leaf_add_work( + struct xfs_buf *bp, + struct xfs_attr3_icleaf_hdr *ichdr, + struct xfs_da_args *args, + int mapindex) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_name_local *name_loc; + struct xfs_attr_leaf_name_remote *name_rmt; + struct xfs_mount *mp; + int tmp; + int i; + + trace_xfs_attr_leaf_add_work(args); + + leaf = bp->b_addr; + ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE); + ASSERT(args->index >= 0 && args->index <= ichdr->count); + + /* + * Force open some space in the entry array and fill it in. + */ + entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; + if (args->index < ichdr->count) { + tmp = ichdr->count - args->index; + tmp *= sizeof(xfs_attr_leaf_entry_t); + memmove(entry + 1, entry, tmp); + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); + } + ichdr->count++; + + /* + * Allocate space for the new string (at the end of the run). + */ + mp = args->trans->t_mountp; + ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize); + ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0); + ASSERT(ichdr->freemap[mapindex].size >= + xfs_attr_leaf_newentsize(args, NULL)); + ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize); + ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); + + ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp); + + entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + + ichdr->freemap[mapindex].size); + entry->hashval = cpu_to_be32(args->hashval); + entry->flags = tmp ? XFS_ATTR_LOCAL : 0; + entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + if (args->op_flags & XFS_DA_OP_RENAME) { + entry->flags |= XFS_ATTR_INCOMPLETE; + if ((args->blkno2 == args->blkno) && + (args->index2 <= args->index)) { + args->index2++; + } + } + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); + ASSERT((args->index == 0) || + (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); + ASSERT((args->index == ichdr->count - 1) || + (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); + + /* + * For "remote" attribute values, simply note that we need to + * allocate space for the "remote" value. We can't actually + * allocate the extents in this transaction, and we can't decide + * which blocks they should be as we might allocate more blocks + * as part of this transaction (a split operation for example). + */ + if (entry->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf, args->index); + name_loc->namelen = args->namelen; + name_loc->valuelen = cpu_to_be16(args->valuelen); + memcpy((char *)name_loc->nameval, args->name, args->namelen); + memcpy((char *)&name_loc->nameval[args->namelen], args->value, + be16_to_cpu(name_loc->valuelen)); + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + name_rmt->namelen = args->namelen; + memcpy((char *)name_rmt->name, args->name, args->namelen); + entry->flags |= XFS_ATTR_INCOMPLETE; + /* just in case */ + name_rmt->valuelen = 0; + name_rmt->valueblk = 0; + args->rmtblkno = 1; + args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); + args->rmtvaluelen = args->valuelen; + } + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), + xfs_attr_leaf_entsize(leaf, args->index))); + + /* + * Update the control info for this leaf node + */ + if (be16_to_cpu(entry->nameidx) < ichdr->firstused) + ichdr->firstused = be16_to_cpu(entry->nameidx); + + ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) + + xfs_attr3_leaf_hdr_size(leaf)); + tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) + + xfs_attr3_leaf_hdr_size(leaf); + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (ichdr->freemap[i].base == tmp) { + ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); + ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t); + } + } + ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); + return 0; +} + +/* + * Garbage collect a leaf attribute list block by copying it to a new buffer. + */ +STATIC void +xfs_attr3_leaf_compact( + struct xfs_da_args *args, + struct xfs_attr3_icleaf_hdr *ichdr_dst, + struct xfs_buf *bp) +{ + struct xfs_attr_leafblock *leaf_src; + struct xfs_attr_leafblock *leaf_dst; + struct xfs_attr3_icleaf_hdr ichdr_src; + struct xfs_trans *trans = args->trans; + char *tmpbuffer; + + trace_xfs_attr_leaf_compact(args); + + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); + memset(bp->b_addr, 0, args->geo->blksize); + leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; + leaf_dst = bp->b_addr; + + /* + * Copy the on-disk header back into the destination buffer to ensure + * all the information in the header that is not part of the incore + * header structure is preserved. + */ + memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src)); + + /* Initialise the incore headers */ + ichdr_src = *ichdr_dst; /* struct copy */ + ichdr_dst->firstused = args->geo->blksize; + ichdr_dst->usedbytes = 0; + ichdr_dst->count = 0; + ichdr_dst->holes = 0; + ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); + ichdr_dst->freemap[0].size = ichdr_dst->firstused - + ichdr_dst->freemap[0].base; + + /* write the header back to initialise the underlying buffer */ + xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); + + /* + * Copy all entry's in the same (sorted) order, + * but allocate name/value pairs packed and in sequence. + */ + xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, + leaf_dst, ichdr_dst, 0, ichdr_src.count); + /* + * this logs the entire buffer, but the caller must write the header + * back to the buffer when it is finished modifying it. + */ + xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); + + kmem_free(tmpbuffer); +} + +/* + * Compare two leaf blocks "order". + * Return 0 unless leaf2 should go before leaf1. + */ +static int +xfs_attr3_leaf_order( + struct xfs_buf *leaf1_bp, + struct xfs_attr3_icleaf_hdr *leaf1hdr, + struct xfs_buf *leaf2_bp, + struct xfs_attr3_icleaf_hdr *leaf2hdr) +{ + struct xfs_attr_leaf_entry *entries1; + struct xfs_attr_leaf_entry *entries2; + + entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr); + entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr); + if (leaf1hdr->count > 0 && leaf2hdr->count > 0 && + ((be32_to_cpu(entries2[0].hashval) < + be32_to_cpu(entries1[0].hashval)) || + (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) < + be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) { + return 1; + } + return 0; +} + +int +xfs_attr_leaf_order( + struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp) +{ + struct xfs_attr3_icleaf_hdr ichdr1; + struct xfs_attr3_icleaf_hdr ichdr2; + + xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); + xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); + return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); +} + +/* + * Redistribute the attribute list entries between two leaf nodes, + * taking into account the size of the new entry. + * + * NOTE: if new block is empty, then it will get the upper half of the + * old block. At present, all (one) callers pass in an empty second block. + * + * This code adjusts the args->index/blkno and args->index2/blkno2 fields + * to match what it is doing in splitting the attribute leaf block. Those + * values are used in "atomic rename" operations on attributes. Note that + * the "new" and "old" values can end up in different blocks. + */ +STATIC void +xfs_attr3_leaf_rebalance( + struct xfs_da_state *state, + struct xfs_da_state_blk *blk1, + struct xfs_da_state_blk *blk2) +{ + struct xfs_da_args *args; + struct xfs_attr_leafblock *leaf1; + struct xfs_attr_leafblock *leaf2; + struct xfs_attr3_icleaf_hdr ichdr1; + struct xfs_attr3_icleaf_hdr ichdr2; + struct xfs_attr_leaf_entry *entries1; + struct xfs_attr_leaf_entry *entries2; + int count; + int totallen; + int max; + int space; + int swap; + + /* + * Set up environment. + */ + ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); + xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); + ASSERT(ichdr2.count == 0); + args = state->args; + + trace_xfs_attr_leaf_rebalance(args); + + /* + * Check ordering of blocks, reverse if it makes things simpler. + * + * NOTE: Given that all (current) callers pass in an empty + * second block, this code should never set "swap". + */ + swap = 0; + if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) { + struct xfs_da_state_blk *tmp_blk; + struct xfs_attr3_icleaf_hdr tmp_ichdr; + + tmp_blk = blk1; + blk1 = blk2; + blk2 = tmp_blk; + + /* struct copies to swap them rather than reconverting */ + tmp_ichdr = ichdr1; + ichdr1 = ichdr2; + ichdr2 = tmp_ichdr; + + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; + swap = 1; + } + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. Then get + * the direction to copy and the number of elements to move. + * + * "inleaf" is true if the new entry should be inserted into blk1. + * If "swap" is also true, then reverse the sense of "inleaf". + */ + state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1, + blk2, &ichdr2, + &count, &totallen); + if (swap) + state->inleaf = !state->inleaf; + + /* + * Move any entries required from leaf to leaf: + */ + if (count < ichdr1.count) { + /* + * Figure the total bytes to be added to the destination leaf. + */ + /* number entries being moved */ + count = ichdr1.count - count; + space = ichdr1.usedbytes - totallen; + space += count * sizeof(xfs_attr_leaf_entry_t); + + /* + * leaf2 is the destination, compact it if it looks tight. + */ + max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); + max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); + if (space > max) + xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); + + /* + * Move high entries from leaf1 to low end of leaf2. + */ + xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, + ichdr1.count - count, leaf2, &ichdr2, 0, count); + + } else if (count > ichdr1.count) { + /* + * I assert that since all callers pass in an empty + * second buffer, this code should never execute. + */ + ASSERT(0); + + /* + * Figure the total bytes to be added to the destination leaf. + */ + /* number entries being moved */ + count -= ichdr1.count; + space = totallen - ichdr1.usedbytes; + space += count * sizeof(xfs_attr_leaf_entry_t); + + /* + * leaf1 is the destination, compact it if it looks tight. + */ + max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); + max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); + if (space > max) + xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); + + /* + * Move low entries from leaf2 to high end of leaf1. + */ + xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, + ichdr1.count, count); + } + + xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); + xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); + xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); + + /* + * Copy out last hashval in each block for B-tree code. + */ + entries1 = xfs_attr3_leaf_entryp(leaf1); + entries2 = xfs_attr3_leaf_entryp(leaf2); + blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval); + blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval); + + /* + * Adjust the expected index for insertion. + * NOTE: this code depends on the (current) situation that the + * second block was originally empty. + * + * If the insertion point moved to the 2nd block, we must adjust + * the index. We must also track the entry just following the + * new entry for use in an "atomic rename" operation, that entry + * is always the "old" entry and the "new" entry is what we are + * inserting. The index/blkno fields refer to the "old" entry, + * while the index2/blkno2 fields refer to the "new" entry. + */ + if (blk1->index > ichdr1.count) { + ASSERT(state->inleaf == 0); + blk2->index = blk1->index - ichdr1.count; + args->index = args->index2 = blk2->index; + args->blkno = args->blkno2 = blk2->blkno; + } else if (blk1->index == ichdr1.count) { + if (state->inleaf) { + args->index = blk1->index; + args->blkno = blk1->blkno; + args->index2 = 0; + args->blkno2 = blk2->blkno; + } else { + /* + * On a double leaf split, the original attr location + * is already stored in blkno2/index2, so don't + * overwrite it overwise we corrupt the tree. + */ + blk2->index = blk1->index - ichdr1.count; + args->index = blk2->index; + args->blkno = blk2->blkno; + if (!state->extravalid) { + /* + * set the new attr location to match the old + * one and let the higher level split code + * decide where in the leaf to place it. + */ + args->index2 = blk2->index; + args->blkno2 = blk2->blkno; + } + } + } else { + ASSERT(state->inleaf == 1); + args->index = args->index2 = blk1->index; + args->blkno = args->blkno2 = blk1->blkno; + } +} + +/* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + * GROT: Is this really necessary? With other than a 512 byte blocksize, + * GROT: there will always be enough room in either block for a new entry. + * GROT: Do a double-split for this case? + */ +STATIC int +xfs_attr3_leaf_figure_balance( + struct xfs_da_state *state, + struct xfs_da_state_blk *blk1, + struct xfs_attr3_icleaf_hdr *ichdr1, + struct xfs_da_state_blk *blk2, + struct xfs_attr3_icleaf_hdr *ichdr2, + int *countarg, + int *usedbytesarg) +{ + struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr; + struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr; + struct xfs_attr_leaf_entry *entry; + int count; + int max; + int index; + int totallen = 0; + int half; + int lastdelta; + int foundit = 0; + int tmp; + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + */ + max = ichdr1->count + ichdr2->count; + half = (max + 1) * sizeof(*entry); + half += ichdr1->usedbytes + ichdr2->usedbytes + + xfs_attr_leaf_newentsize(state->args, NULL); + half /= 2; + lastdelta = state->args->geo->blksize; + entry = xfs_attr3_leaf_entryp(leaf1); + for (count = index = 0; count < max; entry++, index++, count++) { + +#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) + /* + * The new entry is in the first block, account for it. + */ + if (count == blk1->index) { + tmp = totallen + sizeof(*entry) + + xfs_attr_leaf_newentsize(state->args, NULL); + if (XFS_ATTR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_ATTR_ABS(half - tmp); + totallen = tmp; + foundit = 1; + } + + /* + * Wrap around into the second block if necessary. + */ + if (count == ichdr1->count) { + leaf1 = leaf2; + entry = xfs_attr3_leaf_entryp(leaf1); + index = 0; + } + + /* + * Figure out if next leaf entry would be too much. + */ + tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1, + index); + if (XFS_ATTR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_ATTR_ABS(half - tmp); + totallen = tmp; +#undef XFS_ATTR_ABS + } + + /* + * Calculate the number of usedbytes that will end up in lower block. + * If new entry not in lower block, fix up the count. + */ + totallen -= count * sizeof(*entry); + if (foundit) { + totallen -= sizeof(*entry) + + xfs_attr_leaf_newentsize(state->args, NULL); + } + + *countarg = count; + *usedbytesarg = totallen; + return foundit; +} + +/*======================================================================== + * Routines used for shrinking the Btree. + *========================================================================*/ + +/* + * Check a leaf block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + * + * GROT: allow for INCOMPLETE entries in calculation. + */ +int +xfs_attr3_leaf_toosmall( + struct xfs_da_state *state, + int *action) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_da_state_blk *blk; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_buf *bp; + xfs_dablk_t blkno; + int bytes; + int forward; + int error; + int retval; + int i; + + trace_xfs_attr_leaf_toosmall(state->args); + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[ state->path.active-1 ]; + leaf = blk->bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + bytes = xfs_attr3_leaf_hdr_size(leaf) + + ichdr.count * sizeof(xfs_attr_leaf_entry_t) + + ichdr.usedbytes; + if (bytes > (state->args->geo->blksize >> 1)) { + *action = 0; /* blk over 50%, don't try to join */ + return 0; + } + + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (arbitrarily) + * to merge with the forward block unless it is NULL. + */ + if (ichdr.count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = (ichdr.forw != 0); + memcpy(&state->altpath, &state->path, sizeof(state->path)); + error = xfs_da3_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) + return error; + if (retval) { + *action = 0; + } else { + *action = 2; + } + return 0; + } + + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink an attribute list over time. + */ + /* start with smaller blk num */ + forward = ichdr.forw < ichdr.back; + for (i = 0; i < 2; forward = !forward, i++) { + struct xfs_attr3_icleaf_hdr ichdr2; + if (forward) + blkno = ichdr.forw; + else + blkno = ichdr.back; + if (blkno == 0) + continue; + error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, + blkno, -1, &bp); + if (error) + return error; + + xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); + + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2) - + ichdr.usedbytes - ichdr2.usedbytes - + ((ichdr.count + ichdr2.count) * + sizeof(xfs_attr_leaf_entry_t)) - + xfs_attr3_leaf_hdr_size(leaf); + + xfs_trans_brelse(state->args->trans, bp); + if (bytes >= 0) + break; /* fits with at least 25% to spare */ + } + if (i >= 2) { + *action = 0; + return 0; + } + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + memcpy(&state->altpath, &state->path, sizeof(state->path)); + if (blkno < blk->blkno) { + error = xfs_da3_path_shift(state, &state->altpath, forward, + 0, &retval); + } else { + error = xfs_da3_path_shift(state, &state->path, forward, + 0, &retval); + } + if (error) + return error; + if (retval) { + *action = 0; + } else { + *action = 1; + } + return 0; +} + +/* + * Remove a name from the leaf attribute list structure. + * + * Return 1 if leaf is less than 37% full, 0 if >= 37% full. + * If two leaves are 37% full, when combined they will leave 25% free. + */ +int +xfs_attr3_leaf_remove( + struct xfs_buf *bp, + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr_leaf_entry *entry; + int before; + int after; + int smallest; + int entsize; + int tablesize; + int tmp; + int i; + + trace_xfs_attr_leaf_remove(args); + + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + + ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); + ASSERT(args->index >= 0 && args->index < ichdr.count); + ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + + xfs_attr3_leaf_hdr_size(leaf)); + + entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; + + ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); + + /* + * Scan through free region table: + * check for adjacency of free'd entry with an existing one, + * find smallest free region in case we need to replace it, + * adjust any map that borders the entry table, + */ + tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) + + xfs_attr3_leaf_hdr_size(leaf); + tmp = ichdr.freemap[0].size; + before = after = -1; + smallest = XFS_ATTR_LEAF_MAPSIZE - 1; + entsize = xfs_attr_leaf_entsize(leaf, args->index); + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + ASSERT(ichdr.freemap[i].base < args->geo->blksize); + ASSERT(ichdr.freemap[i].size < args->geo->blksize); + if (ichdr.freemap[i].base == tablesize) { + ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t); + ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); + } + + if (ichdr.freemap[i].base + ichdr.freemap[i].size == + be16_to_cpu(entry->nameidx)) { + before = i; + } else if (ichdr.freemap[i].base == + (be16_to_cpu(entry->nameidx) + entsize)) { + after = i; + } else if (ichdr.freemap[i].size < tmp) { + tmp = ichdr.freemap[i].size; + smallest = i; + } + } + + /* + * Coalesce adjacent freemap regions, + * or replace the smallest region. + */ + if ((before >= 0) || (after >= 0)) { + if ((before >= 0) && (after >= 0)) { + ichdr.freemap[before].size += entsize; + ichdr.freemap[before].size += ichdr.freemap[after].size; + ichdr.freemap[after].base = 0; + ichdr.freemap[after].size = 0; + } else if (before >= 0) { + ichdr.freemap[before].size += entsize; + } else { + ichdr.freemap[after].base = be16_to_cpu(entry->nameidx); + ichdr.freemap[after].size += entsize; + } + } else { + /* + * Replace smallest region (if it is smaller than free'd entry) + */ + if (ichdr.freemap[smallest].size < entsize) { + ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx); + ichdr.freemap[smallest].size = entsize; + } + } + + /* + * Did we remove the first entry? + */ + if (be16_to_cpu(entry->nameidx) == ichdr.firstused) + smallest = 1; + else + smallest = 0; + + /* + * Compress the remaining entries and zero out the removed stuff. + */ + memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize); + ichdr.usedbytes -= entsize; + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), + entsize)); + + tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t); + memmove(entry, entry + 1, tmp); + ichdr.count--; + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t))); + + entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count]; + memset(entry, 0, sizeof(xfs_attr_leaf_entry_t)); + + /* + * If we removed the first entry, re-find the first used byte + * in the name area. Note that if the entry was the "firstused", + * then we don't have a "hole" in our block resulting from + * removing the name. + */ + if (smallest) { + tmp = args->geo->blksize; + entry = xfs_attr3_leaf_entryp(leaf); + for (i = ichdr.count - 1; i >= 0; entry++, i--) { + ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); + + if (be16_to_cpu(entry->nameidx) < tmp) + tmp = be16_to_cpu(entry->nameidx); + } + ichdr.firstused = tmp; + if (!ichdr.firstused) + ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN; + } else { + ichdr.holes = 1; /* mark as needing compaction */ + } + xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, &leaf->hdr, + xfs_attr3_leaf_hdr_size(leaf))); + + /* + * Check if leaf is less than 50% full, caller may want to + * "join" the leaf with a sibling if so. + */ + tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + + ichdr.count * sizeof(xfs_attr_leaf_entry_t); + + return tmp < args->geo->magicpct; /* leaf is < 37% full */ +} + +/* + * Move all the attribute list entries from drop_leaf into save_leaf. + */ +void +xfs_attr3_leaf_unbalance( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk) +{ + struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr; + struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr; + struct xfs_attr3_icleaf_hdr drophdr; + struct xfs_attr3_icleaf_hdr savehdr; + struct xfs_attr_leaf_entry *entry; + + trace_xfs_attr_leaf_unbalance(state->args); + + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); + xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); + entry = xfs_attr3_leaf_entryp(drop_leaf); + + /* + * Save last hashval from dying block for later Btree fixup. + */ + drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval); + + /* + * Check if we need a temp buffer, or can we do it in place. + * Note that we don't check "leaf" for holes because we will + * always be dropping it, toosmall() decided that for us already. + */ + if (savehdr.holes == 0) { + /* + * dest leaf has no holes, so we add there. May need + * to make some room in the entry array. + */ + if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, + drop_blk->bp, &drophdr)) { + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, + save_leaf, &savehdr, 0, + drophdr.count); + } else { + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, + save_leaf, &savehdr, + savehdr.count, drophdr.count); + } + } else { + /* + * Destination has holes, so we make a temporary copy + * of the leaf and add them both to that. + */ + struct xfs_attr_leafblock *tmp_leaf; + struct xfs_attr3_icleaf_hdr tmphdr; + + tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); + + /* + * Copy the header into the temp leaf so that all the stuff + * not in the incore header is present and gets copied back in + * once we've moved all the entries. + */ + memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf)); + + memset(&tmphdr, 0, sizeof(tmphdr)); + tmphdr.magic = savehdr.magic; + tmphdr.forw = savehdr.forw; + tmphdr.back = savehdr.back; + tmphdr.firstused = state->args->geo->blksize; + + /* write the header to the temp buffer to initialise it */ + xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); + + if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, + drop_blk->bp, &drophdr)) { + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, + tmp_leaf, &tmphdr, 0, + drophdr.count); + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, + tmp_leaf, &tmphdr, tmphdr.count, + savehdr.count); + } else { + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, + tmp_leaf, &tmphdr, 0, + savehdr.count); + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, + tmp_leaf, &tmphdr, tmphdr.count, + drophdr.count); + } + memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); + savehdr = tmphdr; /* struct copy */ + kmem_free(tmp_leaf); + } + + xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); + xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, + state->args->geo->blksize - 1); + + /* + * Copy out last hashval in each block for B-tree code. + */ + entry = xfs_attr3_leaf_entryp(save_leaf); + save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval); +} + +/*======================================================================== + * Routines used for finding things in the Btree. + *========================================================================*/ + +/* + * Look up a name in a leaf attribute list structure. + * This is the internal routine, it uses the caller's buffer. + * + * Note that duplicate keys are allowed, but only check within the + * current leaf node. The Btree code must check in adjacent leaf nodes. + * + * Return in args->index the index into the entry[] array of either + * the found entry, or where the entry should have been (insert before + * that entry). + * + * Don't change the args->value unless we find the attribute. + */ +int +xfs_attr3_leaf_lookup_int( + struct xfs_buf *bp, + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_entry *entries; + struct xfs_attr_leaf_name_local *name_loc; + struct xfs_attr_leaf_name_remote *name_rmt; + xfs_dahash_t hashval; + int probe; + int span; + + trace_xfs_attr_leaf_lookup(args); + + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + entries = xfs_attr3_leaf_entryp(leaf); + ASSERT(ichdr.count < args->geo->blksize / 8); + + /* + * Binary search. (note: small blocks will skip this loop) + */ + hashval = args->hashval; + probe = span = ichdr.count / 2; + for (entry = &entries[probe]; span > 4; entry = &entries[probe]) { + span /= 2; + if (be32_to_cpu(entry->hashval) < hashval) + probe += span; + else if (be32_to_cpu(entry->hashval) > hashval) + probe -= span; + else + break; + } + ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); + ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); + + /* + * Since we may have duplicate hashval's, find the first matching + * hashval in the leaf. + */ + while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) { + entry--; + probe--; + } + while (probe < ichdr.count && + be32_to_cpu(entry->hashval) < hashval) { + entry++; + probe++; + } + if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { + args->index = probe; + return ENOATTR; + } + + /* + * Duplicate keys may be present, so search all of them for a match. + */ + for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval); + entry++, probe++) { +/* + * GROT: Add code to remove incomplete entries. + */ + /* + * If we are looking for INCOMPLETE entries, show only those. + * If we are looking for complete entries, show only those. + */ + if ((args->flags & XFS_ATTR_INCOMPLETE) != + (entry->flags & XFS_ATTR_INCOMPLETE)) { + continue; + } + if (entry->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf, probe); + if (name_loc->namelen != args->namelen) + continue; + if (memcmp(args->name, name_loc->nameval, + args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, entry->flags)) + continue; + args->index = probe; + return EEXIST; + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); + if (name_rmt->namelen != args->namelen) + continue; + if (memcmp(args->name, name_rmt->name, + args->namelen) != 0) + continue; + if (!xfs_attr_namesp_match(args->flags, entry->flags)) + continue; + args->index = probe; + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); + args->rmtblkno = be32_to_cpu(name_rmt->valueblk); + args->rmtblkcnt = xfs_attr3_rmt_blocks( + args->dp->i_mount, + args->rmtvaluelen); + return EEXIST; + } + } + args->index = probe; + return ENOATTR; +} + +/* + * Get the value associated with an attribute name from a leaf attribute + * list structure. + */ +int +xfs_attr3_leaf_getvalue( + struct xfs_buf *bp, + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_name_local *name_loc; + struct xfs_attr_leaf_name_remote *name_rmt; + int valuelen; + + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + ASSERT(ichdr.count < args->geo->blksize / 8); + ASSERT(args->index < ichdr.count); + + entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; + if (entry->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf, args->index); + ASSERT(name_loc->namelen == args->namelen); + ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); + valuelen = be16_to_cpu(name_loc->valuelen); + if (args->flags & ATTR_KERNOVAL) { + args->valuelen = valuelen; + return 0; + } + if (args->valuelen < valuelen) { + args->valuelen = valuelen; + return ERANGE; + } + args->valuelen = valuelen; + memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + ASSERT(name_rmt->namelen == args->namelen); + ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); + args->rmtblkno = be32_to_cpu(name_rmt->valueblk); + args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, + args->rmtvaluelen); + if (args->flags & ATTR_KERNOVAL) { + args->valuelen = args->rmtvaluelen; + return 0; + } + if (args->valuelen < args->rmtvaluelen) { + args->valuelen = args->rmtvaluelen; + return ERANGE; + } + args->valuelen = args->rmtvaluelen; + } + return 0; +} + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Move the indicated entries from one leaf to another. + * NOTE: this routine modifies both source and destination leaves. + */ +/*ARGSUSED*/ +STATIC void +xfs_attr3_leaf_moveents( + struct xfs_da_args *args, + struct xfs_attr_leafblock *leaf_s, + struct xfs_attr3_icleaf_hdr *ichdr_s, + int start_s, + struct xfs_attr_leafblock *leaf_d, + struct xfs_attr3_icleaf_hdr *ichdr_d, + int start_d, + int count) +{ + struct xfs_attr_leaf_entry *entry_s; + struct xfs_attr_leaf_entry *entry_d; + int desti; + int tmp; + int i; + + /* + * Check for nothing to do. + */ + if (count == 0) + return; + + /* + * Set up environment. + */ + ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || + ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); + ASSERT(ichdr_s->magic == ichdr_d->magic); + ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); + ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) + + xfs_attr3_leaf_hdr_size(leaf_s)); + ASSERT(ichdr_d->count < args->geo->blksize / 8); + ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) + + xfs_attr3_leaf_hdr_size(leaf_d)); + + ASSERT(start_s < ichdr_s->count); + ASSERT(start_d <= ichdr_d->count); + ASSERT(count <= ichdr_s->count); + + + /* + * Move the entries in the destination leaf up to make a hole? + */ + if (start_d < ichdr_d->count) { + tmp = ichdr_d->count - start_d; + tmp *= sizeof(xfs_attr_leaf_entry_t); + entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; + entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count]; + memmove(entry_d, entry_s, tmp); + } + + /* + * Copy all entry's in the same (sorted) order, + * but allocate attribute info packed and in sequence. + */ + entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; + entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; + desti = start_d; + for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { + ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused); + tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); +#ifdef GROT + /* + * Code to drop INCOMPLETE entries. Difficult to use as we + * may also need to change the insertion index. Code turned + * off for 6.2, should be revisited later. + */ + if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ + memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); + ichdr_s->usedbytes -= tmp; + ichdr_s->count -= 1; + entry_d--; /* to compensate for ++ in loop hdr */ + desti--; + if ((start_s + i) < offset) + result++; /* insertion index adjustment */ + } else { +#endif /* GROT */ + ichdr_d->firstused -= tmp; + /* both on-disk, don't endian flip twice */ + entry_d->hashval = entry_s->hashval; + entry_d->nameidx = cpu_to_be16(ichdr_d->firstused); + entry_d->flags = entry_s->flags; + ASSERT(be16_to_cpu(entry_d->nameidx) + tmp + <= args->geo->blksize); + memmove(xfs_attr3_leaf_name(leaf_d, desti), + xfs_attr3_leaf_name(leaf_s, start_s + i), tmp); + ASSERT(be16_to_cpu(entry_s->nameidx) + tmp + <= args->geo->blksize); + memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); + ichdr_s->usedbytes -= tmp; + ichdr_d->usedbytes += tmp; + ichdr_s->count -= 1; + ichdr_d->count += 1; + tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) + + xfs_attr3_leaf_hdr_size(leaf_d); + ASSERT(ichdr_d->firstused >= tmp); +#ifdef GROT + } +#endif /* GROT */ + } + + /* + * Zero out the entries we just copied. + */ + if (start_s == ichdr_s->count) { + tmp = count * sizeof(xfs_attr_leaf_entry_t); + entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; + ASSERT(((char *)entry_s + tmp) <= + ((char *)leaf_s + args->geo->blksize)); + memset(entry_s, 0, tmp); + } else { + /* + * Move the remaining entries down to fill the hole, + * then zero the entries at the top. + */ + tmp = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t); + entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count]; + entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; + memmove(entry_d, entry_s, tmp); + + tmp = count * sizeof(xfs_attr_leaf_entry_t); + entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count]; + ASSERT(((char *)entry_s + tmp) <= + ((char *)leaf_s + args->geo->blksize)); + memset(entry_s, 0, tmp); + } + + /* + * Fill in the freemap information + */ + ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); + ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); + ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; + ichdr_d->freemap[1].base = 0; + ichdr_d->freemap[2].base = 0; + ichdr_d->freemap[1].size = 0; + ichdr_d->freemap[2].size = 0; + ichdr_s->holes = 1; /* leaf may not be compact */ +} + +/* + * Pick up the last hashvalue from a leaf block. + */ +xfs_dahash_t +xfs_attr_leaf_lasthash( + struct xfs_buf *bp, + int *count) +{ + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr_leaf_entry *entries; + + xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); + entries = xfs_attr3_leaf_entryp(bp->b_addr); + if (count) + *count = ichdr.count; + if (!ichdr.count) + return 0; + return be32_to_cpu(entries[ichdr.count - 1].hashval); +} + +/* + * Calculate the number of bytes used to store the indicated attribute + * (whether local or remote only calculate bytes in this block). + */ +STATIC int +xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) +{ + struct xfs_attr_leaf_entry *entries; + xfs_attr_leaf_name_local_t *name_loc; + xfs_attr_leaf_name_remote_t *name_rmt; + int size; + + entries = xfs_attr3_leaf_entryp(leaf); + if (entries[index].flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf, index); + size = xfs_attr_leaf_entsize_local(name_loc->namelen, + be16_to_cpu(name_loc->valuelen)); + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf, index); + size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); + } + return size; +} + +/* + * Calculate the number of bytes that would be required to store the new + * attribute (whether local or remote only calculate bytes in this block). + * This routine decides as a side effect whether the attribute will be + * a "local" or a "remote" attribute. + */ +int +xfs_attr_leaf_newentsize( + struct xfs_da_args *args, + int *local) +{ + int size; + + size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); + if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { + if (local) + *local = 1; + return size; + } + if (local) + *local = 0; + return xfs_attr_leaf_entsize_remote(args->namelen); +} + + +/*======================================================================== + * Manage the INCOMPLETE flag in a leaf entry + *========================================================================*/ + +/* + * Clear the INCOMPLETE flag on an entry in a leaf block. + */ +int +xfs_attr3_leaf_clearflag( + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_name_remote *name_rmt; + struct xfs_buf *bp; + int error; +#ifdef DEBUG + struct xfs_attr3_icleaf_hdr ichdr; + xfs_attr_leaf_name_local_t *name_loc; + int namelen; + char *name; +#endif /* DEBUG */ + + trace_xfs_attr_leaf_clearflag(args); + /* + * Set up the operation. + */ + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; + + leaf = bp->b_addr; + entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; + ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); + +#ifdef DEBUG + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + ASSERT(args->index < ichdr.count); + ASSERT(args->index >= 0); + + if (entry->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf, args->index); + namelen = name_loc->namelen; + name = (char *)name_loc->nameval; + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + namelen = name_rmt->namelen; + name = (char *)name_rmt->name; + } + ASSERT(be32_to_cpu(entry->hashval) == args->hashval); + ASSERT(namelen == args->namelen); + ASSERT(memcmp(name, args->name, namelen) == 0); +#endif /* DEBUG */ + + entry->flags &= ~XFS_ATTR_INCOMPLETE; + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); + + if (args->rmtblkno) { + ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + name_rmt->valueblk = cpu_to_be32(args->rmtblkno); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); + } + + /* + * Commit the flag value change and start the next trans in series. + */ + return xfs_trans_roll(&args->trans, args->dp); +} + +/* + * Set the INCOMPLETE flag on an entry in a leaf block. + */ +int +xfs_attr3_leaf_setflag( + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_entry *entry; + struct xfs_attr_leaf_name_remote *name_rmt; + struct xfs_buf *bp; + int error; +#ifdef DEBUG + struct xfs_attr3_icleaf_hdr ichdr; +#endif + + trace_xfs_attr_leaf_setflag(args); + + /* + * Set up the operation. + */ + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; + + leaf = bp->b_addr; +#ifdef DEBUG + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); + ASSERT(args->index < ichdr.count); + ASSERT(args->index >= 0); +#endif + entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; + + ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); + entry->flags |= XFS_ATTR_INCOMPLETE; + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); + if ((entry->flags & XFS_ATTR_LOCAL) == 0) { + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + name_rmt->valueblk = 0; + name_rmt->valuelen = 0; + xfs_trans_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); + } + + /* + * Commit the flag value change and start the next trans in series. + */ + return xfs_trans_roll(&args->trans, args->dp); +} + +/* + * In a single transaction, clear the INCOMPLETE flag on the leaf entry + * given by args->blkno/index and set the INCOMPLETE flag on the leaf + * entry given by args->blkno2/index2. + * + * Note that they could be in different blocks, or in the same block. + */ +int +xfs_attr3_leaf_flipflags( + struct xfs_da_args *args) +{ + struct xfs_attr_leafblock *leaf1; + struct xfs_attr_leafblock *leaf2; + struct xfs_attr_leaf_entry *entry1; + struct xfs_attr_leaf_entry *entry2; + struct xfs_attr_leaf_name_remote *name_rmt; + struct xfs_buf *bp1; + struct xfs_buf *bp2; + int error; +#ifdef DEBUG + struct xfs_attr3_icleaf_hdr ichdr1; + struct xfs_attr3_icleaf_hdr ichdr2; + xfs_attr_leaf_name_local_t *name_loc; + int namelen1, namelen2; + char *name1, *name2; +#endif /* DEBUG */ + + trace_xfs_attr_leaf_flipflags(args); + + /* + * Read the block containing the "old" attr + */ + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); + if (error) + return error; + + /* + * Read the block containing the "new" attr, if it is different + */ + if (args->blkno2 != args->blkno) { + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2, + -1, &bp2); + if (error) + return error; + } else { + bp2 = bp1; + } + + leaf1 = bp1->b_addr; + entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index]; + + leaf2 = bp2->b_addr; + entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; + +#ifdef DEBUG + xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); + ASSERT(args->index < ichdr1.count); + ASSERT(args->index >= 0); + + xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); + ASSERT(args->index2 < ichdr2.count); + ASSERT(args->index2 >= 0); + + if (entry1->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf1, args->index); + namelen1 = name_loc->namelen; + name1 = (char *)name_loc->nameval; + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); + namelen1 = name_rmt->namelen; + name1 = (char *)name_rmt->name; + } + if (entry2->flags & XFS_ATTR_LOCAL) { + name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2); + namelen2 = name_loc->namelen; + name2 = (char *)name_loc->nameval; + } else { + name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); + namelen2 = name_rmt->namelen; + name2 = (char *)name_rmt->name; + } + ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval)); + ASSERT(namelen1 == namelen2); + ASSERT(memcmp(name1, name2, namelen1) == 0); +#endif /* DEBUG */ + + ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE); + ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0); + + entry1->flags &= ~XFS_ATTR_INCOMPLETE; + xfs_trans_log_buf(args->trans, bp1, + XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); + if (args->rmtblkno) { + ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); + name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); + name_rmt->valueblk = cpu_to_be32(args->rmtblkno); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); + xfs_trans_log_buf(args->trans, bp1, + XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); + } + + entry2->flags |= XFS_ATTR_INCOMPLETE; + xfs_trans_log_buf(args->trans, bp2, + XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); + if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { + name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); + name_rmt->valueblk = 0; + name_rmt->valuelen = 0; + xfs_trans_log_buf(args->trans, bp2, + XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); + } + + /* + * Commit the flag value change and start the next trans in series. + */ + error = xfs_trans_roll(&args->trans, args->dp); + + return error; +} diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c new file mode 100644 index 000000000000..a8bbc562ff35 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_remote.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_buf_item.h" +#include "xfs_error.h" + +#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ + +/* + * Each contiguous block has a header, so it is not just a simple attribute + * length to FSB conversion. + */ +int +xfs_attr3_rmt_blocks( + struct xfs_mount *mp, + int attrlen) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); + return (attrlen + buflen - 1) / buflen; + } + return XFS_B_TO_FSB(mp, attrlen); +} + +/* + * Checking of the remote attribute header is split into two parts. The verifier + * does CRC, location and bounds checking, the unpacking function checks the + * attribute parameters and owner. + */ +static bool +xfs_attr3_rmt_hdr_ok( + void *ptr, + xfs_ino_t ino, + uint32_t offset, + uint32_t size, + xfs_daddr_t bno) +{ + struct xfs_attr3_rmt_hdr *rmt = ptr; + + if (bno != be64_to_cpu(rmt->rm_blkno)) + return false; + if (offset != be32_to_cpu(rmt->rm_offset)) + return false; + if (size != be32_to_cpu(rmt->rm_bytes)) + return false; + if (ino != be64_to_cpu(rmt->rm_owner)) + return false; + + /* ok */ + return true; +} + +static bool +xfs_attr3_rmt_verify( + struct xfs_mount *mp, + void *ptr, + int fsbsize, + xfs_daddr_t bno) +{ + struct xfs_attr3_rmt_hdr *rmt = ptr; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) + return false; + if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(rmt->rm_blkno) != bno) + return false; + if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) + return false; + if (be32_to_cpu(rmt->rm_offset) + + be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) + return false; + if (rmt->rm_owner == 0) + return false; + + return true; +} + +static void +xfs_attr3_rmt_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + char *ptr; + int len; + xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; + + /* no verification of non-crc buffers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + ptr = bp->b_addr; + bno = bp->b_bn; + len = BBTOB(bp->b_length); + ASSERT(len >= blksize); + + while (len > 0) { + if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + xfs_buf_ioerror(bp, EFSBADCRC); + break; + } + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + break; + } + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); + } + + if (bp->b_error) + xfs_verifier_error(bp); + else + ASSERT(len == 0); +} + +static void +xfs_attr3_rmt_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + char *ptr; + int len; + xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; + + /* no verification of non-crc buffers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + ptr = bp->b_addr; + bno = bp->b_bn; + len = BBTOB(bp->b_length); + ASSERT(len >= blksize); + + while (len > 0) { + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + if (bip) { + struct xfs_attr3_rmt_hdr *rmt; + + rmt = (struct xfs_attr3_rmt_hdr *)ptr; + rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + } + xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); + + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); + } + ASSERT(len == 0); +} + +const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { + .verify_read = xfs_attr3_rmt_read_verify, + .verify_write = xfs_attr3_rmt_write_verify, +}; + +STATIC int +xfs_attr3_rmt_hdr_set( + struct xfs_mount *mp, + void *ptr, + xfs_ino_t ino, + uint32_t offset, + uint32_t size, + xfs_daddr_t bno) +{ + struct xfs_attr3_rmt_hdr *rmt = ptr; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return 0; + + rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); + rmt->rm_offset = cpu_to_be32(offset); + rmt->rm_bytes = cpu_to_be32(size); + uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); + rmt->rm_owner = cpu_to_be64(ino); + rmt->rm_blkno = cpu_to_be64(bno); + + return sizeof(struct xfs_attr3_rmt_hdr); +} + +/* + * Helper functions to copy attribute data in and out of the one disk extents + */ +STATIC int +xfs_attr_rmtval_copyout( + struct xfs_mount *mp, + struct xfs_buf *bp, + xfs_ino_t ino, + int *offset, + int *valuelen, + __uint8_t **dst) +{ + char *src = bp->b_addr; + xfs_daddr_t bno = bp->b_bn; + int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; + + ASSERT(len >= blksize); + + while (len > 0 && *valuelen > 0) { + int hdr_size = 0; + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); + + byte_cnt = min(*valuelen, byte_cnt); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, + byte_cnt, bno)) { + xfs_alert(mp, +"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", + bno, *offset, byte_cnt, ino); + return EFSCORRUPTED; + } + hdr_size = sizeof(struct xfs_attr3_rmt_hdr); + } + + memcpy(*dst, src + hdr_size, byte_cnt); + + /* roll buffer forwards */ + len -= blksize; + src += blksize; + bno += BTOBB(blksize); + + /* roll attribute data forwards */ + *valuelen -= byte_cnt; + *dst += byte_cnt; + *offset += byte_cnt; + } + return 0; +} + +STATIC void +xfs_attr_rmtval_copyin( + struct xfs_mount *mp, + struct xfs_buf *bp, + xfs_ino_t ino, + int *offset, + int *valuelen, + __uint8_t **src) +{ + char *dst = bp->b_addr; + xfs_daddr_t bno = bp->b_bn; + int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; + + ASSERT(len >= blksize); + + while (len > 0 && *valuelen > 0) { + int hdr_size; + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); + + byte_cnt = min(*valuelen, byte_cnt); + hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, + byte_cnt, bno); + + memcpy(dst + hdr_size, *src, byte_cnt); + + /* + * If this is the last block, zero the remainder of it. + * Check that we are actually the last block, too. + */ + if (byte_cnt + hdr_size < blksize) { + ASSERT(*valuelen - byte_cnt == 0); + ASSERT(len == blksize); + memset(dst + hdr_size + byte_cnt, 0, + blksize - hdr_size - byte_cnt); + } + + /* roll buffer forwards */ + len -= blksize; + dst += blksize; + bno += BTOBB(blksize); + + /* roll attribute data forwards */ + *valuelen -= byte_cnt; + *src += byte_cnt; + *offset += byte_cnt; + } +} + +/* + * Read the value associated with an attribute from the out-of-line buffer + * that we stored it in. + */ +int +xfs_attr_rmtval_get( + struct xfs_da_args *args) +{ + struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; + struct xfs_mount *mp = args->dp->i_mount; + struct xfs_buf *bp; + xfs_dablk_t lblkno = args->rmtblkno; + __uint8_t *dst = args->value; + int valuelen; + int nmap; + int error; + int blkcnt = args->rmtblkcnt; + int i; + int offset = 0; + + trace_xfs_attr_rmtval_get(args); + + ASSERT(!(args->flags & ATTR_KERNOVAL)); + ASSERT(args->rmtvaluelen == args->valuelen); + + valuelen = args->rmtvaluelen; + while (valuelen > 0) { + nmap = ATTR_RMTVALUE_MAPSIZE; + error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, + blkcnt, map, &nmap, + XFS_BMAPI_ATTRFORK); + if (error) + return error; + ASSERT(nmap >= 1); + + for (i = 0; (i < nmap) && (valuelen > 0); i++) { + xfs_daddr_t dblkno; + int dblkcnt; + + ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && + (map[i].br_startblock != HOLESTARTBLOCK)); + dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); + dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + dblkno, dblkcnt, 0, &bp, + &xfs_attr3_rmt_buf_ops); + if (error) + return error; + + error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, + &offset, &valuelen, + &dst); + xfs_buf_relse(bp); + if (error) + return error; + + /* roll attribute extent map forwards */ + lblkno += map[i].br_blockcount; + blkcnt -= map[i].br_blockcount; + } + } + ASSERT(valuelen == 0); + return 0; +} + +/* + * Write the value associated with an attribute into the out-of-line buffer + * that we have defined for it. + */ +int +xfs_attr_rmtval_set( + struct xfs_da_args *args) +{ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_bmbt_irec map; + xfs_dablk_t lblkno; + xfs_fileoff_t lfileoff = 0; + __uint8_t *src = args->value; + int blkcnt; + int valuelen; + int nmap; + int error; + int offset = 0; + + trace_xfs_attr_rmtval_set(args); + + /* + * Find a "hole" in the attribute address space large enough for + * us to drop the new attribute's value into. Because CRC enable + * attributes have headers, we can't just do a straight byte to FSB + * conversion and have to take the header space into account. + */ + blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); + error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, + XFS_ATTR_FORK); + if (error) + return error; + + args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; + args->rmtblkcnt = blkcnt; + + /* + * Roll through the "value", allocating blocks on disk as required. + */ + while (blkcnt > 0) { + int committed; + + /* + * Allocate a single extent, up to the size of the value. + */ + xfs_bmap_init(args->flist, args->firstblock); + nmap = 1; + error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, + blkcnt, + XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, + args->firstblock, args->total, &map, &nmap, + args->flist); + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + return error; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + + ASSERT(nmap == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + lblkno += map.br_blockcount; + blkcnt -= map.br_blockcount; + + /* + * Start the next trans in the chain. + */ + error = xfs_trans_roll(&args->trans, dp); + if (error) + return error; + } + + /* + * Roll through the "value", copying the attribute value to the + * already-allocated blocks. Blocks are written synchronously + * so that we can know they are all on disk before we turn off + * the INCOMPLETE flag. + */ + lblkno = args->rmtblkno; + blkcnt = args->rmtblkcnt; + valuelen = args->rmtvaluelen; + while (valuelen > 0) { + struct xfs_buf *bp; + xfs_daddr_t dblkno; + int dblkcnt; + + ASSERT(blkcnt > 0); + + xfs_bmap_init(args->flist, args->firstblock); + nmap = 1; + error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, + blkcnt, &map, &nmap, + XFS_BMAPI_ATTRFORK); + if (error) + return error; + ASSERT(nmap == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + + dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), + dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); + + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); + if (!bp) + return ENOMEM; + bp->b_ops = &xfs_attr3_rmt_buf_ops; + + xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, + &valuelen, &src); + + error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ + xfs_buf_relse(bp); + if (error) + return error; + + + /* roll attribute extent map forwards */ + lblkno += map.br_blockcount; + blkcnt -= map.br_blockcount; + } + ASSERT(valuelen == 0); + return 0; +} + +/* + * Remove the value associated with an attribute by deleting the + * out-of-line buffer that it is stored on. + */ +int +xfs_attr_rmtval_remove( + struct xfs_da_args *args) +{ + struct xfs_mount *mp = args->dp->i_mount; + xfs_dablk_t lblkno; + int blkcnt; + int error; + int done; + + trace_xfs_attr_rmtval_remove(args); + + /* + * Roll through the "value", invalidating the attribute value's blocks. + */ + lblkno = args->rmtblkno; + blkcnt = args->rmtblkcnt; + while (blkcnt > 0) { + struct xfs_bmbt_irec map; + struct xfs_buf *bp; + xfs_daddr_t dblkno; + int dblkcnt; + int nmap; + + /* + * Try to remember where we decided to put the value. + */ + nmap = 1; + error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, + blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); + if (error) + return error; + ASSERT(nmap == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + + dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), + dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); + + /* + * If the "remote" value is in the cache, remove it. + */ + bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); + if (bp) { + xfs_buf_stale(bp); + xfs_buf_relse(bp); + bp = NULL; + } + + lblkno += map.br_blockcount; + blkcnt -= map.br_blockcount; + } + + /* + * Keep de-allocating extents until the remote-value region is gone. + */ + lblkno = args->rmtblkno; + blkcnt = args->rmtblkcnt; + done = 0; + while (!done) { + int committed; + + xfs_bmap_init(args->flist, args->firstblock); + error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, + XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, + 1, args->firstblock, args->flist, + &done); + if (!error) { + error = xfs_bmap_finish(&args->trans, args->flist, + &committed); + } + if (error) { + ASSERT(committed); + args->trans = NULL; + xfs_bmap_cancel(args->flist); + return error; + } + + /* + * bmap_finish() may have committed the last trans and started + * a new one. We need the inode to be in all transactions. + */ + if (committed) + xfs_trans_ijoin(args->trans, args->dp, 0); + + /* + * Close out trans and start the next one in the chain. + */ + error = xfs_trans_roll(&args->trans, args->dp); + if (error) + return error; + } + return 0; +} diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c new file mode 100644 index 000000000000..b44d63189dab --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -0,0 +1,5609 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_extfree_item.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_quota.h" +#include "xfs_trans_space.h" +#include "xfs_buf_item.h" +#include "xfs_trace.h" +#include "xfs_symlink.h" +#include "xfs_attr_leaf.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h" + + +kmem_zone_t *xfs_bmap_free_item_zone; + +/* + * Miscellaneous helper functions + */ + +/* + * Compute and fill in the value of the maximum depth of a bmap btree + * in this filesystem. Done once, during mount. + */ +void +xfs_bmap_compute_maxlevels( + xfs_mount_t *mp, /* file system mount structure */ + int whichfork) /* data or attr fork */ +{ + int level; /* btree level */ + uint maxblocks; /* max blocks at this level */ + uint maxleafents; /* max leaf entries possible */ + int maxrootrecs; /* max records in root block */ + int minleafrecs; /* min records in leaf block */ + int minnoderecs; /* min records in node block */ + int sz; /* root block size */ + + /* + * The maximum number of extents in a file, hence the maximum + * number of leaf entries, is controlled by the type of di_nextents + * (a signed 32-bit number, xfs_extnum_t), or by di_anextents + * (a signed 16-bit number, xfs_aextnum_t). + * + * Note that we can no longer assume that if we are in ATTR1 that + * the fork offset of all the inodes will be + * (xfs_default_attroffset(ip) >> 3) because we could have mounted + * with ATTR2 and then mounted back with ATTR1, keeping the + * di_forkoff's fixed but probably at various positions. Therefore, + * for both ATTR1 and ATTR2 we have to assume the worst case scenario + * of a minimum size available. + */ + if (whichfork == XFS_DATA_FORK) { + maxleafents = MAXEXTNUM; + sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + } else { + maxleafents = MAXAEXTNUM; + sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); + } + maxrootrecs = xfs_bmdr_maxrecs(sz, 0); + minleafrecs = mp->m_bmap_dmnr[0]; + minnoderecs = mp->m_bmap_dmnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) { + if (maxblocks <= maxrootrecs) + maxblocks = 1; + else + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + } + mp->m_bm_maxlevels[whichfork] = level; +} + +STATIC int /* error */ +xfs_bmbt_lookup_eq( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +STATIC int /* error */ +xfs_bmbt_lookup_ge( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Check if the inode needs to be converted to btree format. + */ +static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) +{ + return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) > + XFS_IFORK_MAXEXT(ip, whichfork); +} + +/* + * Check if the inode should be converted to extent format. + */ +static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) +{ + return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && + XFS_IFORK_NEXTENTS(ip, whichfork) <= + XFS_IFORK_MAXEXT(ip, whichfork); +} + +/* + * Update the record referred to by cur to the value given + * by [off, bno, len, state]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_bmbt_update( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + xfs_exntst_t state) +{ + union xfs_btree_rec rec; + + xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); + return xfs_btree_update(cur, &rec); +} + +/* + * Compute the worst-case number of indirect blocks that will be used + * for ip's delayed extent of length "len". + */ +STATIC xfs_filblks_t +xfs_bmap_worst_indlen( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_filblks_t len) /* delayed extent length */ +{ + int level; /* btree level number */ + int maxrecs; /* maximum record count at this level */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t rval; /* return value */ + + mp = ip->i_mount; + maxrecs = mp->m_bmap_dmxr[0]; + for (level = 0, rval = 0; + level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); + level++) { + len += maxrecs - 1; + do_div(len, maxrecs); + rval += len; + if (len == 1) + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + level - 1; + if (level == 0) + maxrecs = mp->m_bmap_dmxr[1]; + } + return rval; +} + +/* + * Calculate the default attribute fork offset for newly created inodes. + */ +uint +xfs_default_attroffset( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + uint offset; + + if (mp->m_sb.sb_inodesize == 256) { + offset = XFS_LITINO(mp, ip->i_d.di_version) - + XFS_BMDR_SPACE_CALC(MINABTPTRS); + } else { + offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); + } + + ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); + return offset; +} + +/* + * Helper routine to reset inode di_forkoff field when switching + * attribute fork from local to extent format - we reset it where + * possible to make space available for inline data fork extents. + */ +STATIC void +xfs_bmap_forkoff_reset( + xfs_inode_t *ip, + int whichfork) +{ + if (whichfork == XFS_ATTR_FORK && + ip->i_d.di_format != XFS_DINODE_FMT_DEV && + ip->i_d.di_format != XFS_DINODE_FMT_UUID && + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { + uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; + + if (dfl_forkoff > ip->i_d.di_forkoff) + ip->i_d.di_forkoff = dfl_forkoff; + } +} + +/* + * Debug/sanity checking code + */ + +STATIC int +xfs_bmap_sanity_check( + struct xfs_mount *mp, + struct xfs_buf *bp, + int level) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && + block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) + return 0; + + if (be16_to_cpu(block->bb_level) != level || + be16_to_cpu(block->bb_numrecs) == 0 || + be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) + return 0; + + return 1; +} + +#ifdef DEBUG +STATIC struct xfs_buf * +xfs_bmap_get_bp( + struct xfs_btree_cur *cur, + xfs_fsblock_t bno) +{ + struct xfs_log_item_desc *lidp; + int i; + + if (!cur) + return NULL; + + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { + if (!cur->bc_bufs[i]) + break; + if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) + return cur->bc_bufs[i]; + } + + /* Chase down all the log items to see if the bp is there */ + list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { + struct xfs_buf_log_item *bip; + bip = (struct xfs_buf_log_item *)lidp->lid_item; + if (bip->bli_item.li_type == XFS_LI_BUF && + XFS_BUF_ADDR(bip->bli_buf) == bno) + return bip->bli_buf; + } + + return NULL; +} + +STATIC void +xfs_check_block( + struct xfs_btree_block *block, + xfs_mount_t *mp, + int root, + short sz) +{ + int i, j, dmxr; + __be64 *pp, *thispa; /* pointer to block address */ + xfs_bmbt_key_t *prevp, *keyp; + + ASSERT(be16_to_cpu(block->bb_level) > 0); + + prevp = NULL; + for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { + dmxr = mp->m_bmap_dmxr[0]; + keyp = XFS_BMBT_KEY_ADDR(mp, block, i); + + if (prevp) { + ASSERT(be64_to_cpu(prevp->br_startoff) < + be64_to_cpu(keyp->br_startoff)); + } + prevp = keyp; + + /* + * Compare the block numbers to see if there are dups. + */ + if (root) + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + else + pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + + for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { + if (root) + thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + else + thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + if (*thispa == *pp) { + xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", + __func__, j, i, + (unsigned long long)be64_to_cpu(*thispa)); + panic("%s: ptrs are equal in node\n", + __func__); + } + } + } +} + +/* + * Check that the extents for the inode ip are in the right order in all + * btree leaves. + */ + +STATIC void +xfs_bmap_check_leaf_extents( + xfs_btree_cur_t *cur, /* btree cursor or null */ + xfs_inode_t *ip, /* incore inode pointer */ + int whichfork) /* data or attr fork */ +{ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_extnum_t i=0, j; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ + xfs_bmbt_rec_t *ep; /* pointer to current extent */ + xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ + xfs_bmbt_rec_t *nextp; /* pointer to next extent */ + int bp_release = 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { + return; + } + + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + block = ifp->if_broot; + /* + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + */ + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + xfs_check_block(block, mp, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. + */ + while (level-- > 0) { + /* See if buf is in cur first */ + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + goto error_norelse; + } + block = XFS_BUF_TO_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, level), + error0); + if (level == 0) + break; + + /* + * Check this block for basic sanity (increasing keys and + * no duplicate blocks). + */ + + xfs_check_block(block, mp, 0, 0); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + } + + /* + * Here with bp and block set to the leftmost leaf node in the tree. + */ + i = 0; + + /* + * Loop over all leaf nodes checking that all extents are in the right order. + */ + for (;;) { + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + + + num_recs = xfs_btree_get_numrecs(block); + + /* + * Read-ahead the next leaf block, if any. + */ + + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + + /* + * Check all the extents to make sure they are OK. + * If we had a previous block, the last entry should + * conform with the first entry in this one. + */ + + ep = XFS_BMBT_REC_ADDR(mp, block, 1); + if (i) { + ASSERT(xfs_bmbt_disk_get_startoff(&last) + + xfs_bmbt_disk_get_blockcount(&last) <= + xfs_bmbt_disk_get_startoff(ep)); + } + for (j = 1; j < num_recs; j++) { + nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + ASSERT(xfs_bmbt_disk_get_startoff(ep) + + xfs_bmbt_disk_get_blockcount(ep) <= + xfs_bmbt_disk_get_startoff(nextp)); + ep = nextp; + } + + last = *ep; + i += num_recs; + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + bno = nextbno; + /* + * If we've reached the end, stop. + */ + if (bno == NULLFSBLOCK) + break; + + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + goto error_norelse; + } + block = XFS_BUF_TO_BLOCK(bp); + } + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + return; + +error0: + xfs_warn(mp, "%s: at error0", __func__); + if (bp_release) + xfs_trans_brelse(NULL, bp); +error_norelse: + xfs_warn(mp, "%s: BAD after btree leaves for %d extents", + __func__, i); + panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); + return; +} + +/* + * Add bmap trace insert entries for all the contents of the extent records. + */ +void +xfs_bmap_trace_exlist( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t cnt, /* count of entries in the list */ + int whichfork, /* data or attr fork */ + unsigned long caller_ip) +{ + xfs_extnum_t idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int state = 0; + + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + for (idx = 0; idx < cnt; idx++) + trace_xfs_extlist(ip, idx, whichfork, caller_ip); +} + +/* + * Validate that the bmbt_irecs being returned from bmapi are valid + * given the caller's original parameters. Specifically check the + * ranges of the returned irecs to ensure that they only extend beyond + * the given parameters if the XFS_BMAPI_ENTIRE flag was set. + */ +STATIC void +xfs_bmap_validate_ret( + xfs_fileoff_t bno, + xfs_filblks_t len, + int flags, + xfs_bmbt_irec_t *mval, + int nmap, + int ret_nmap) +{ + int i; /* index to map values */ + + ASSERT(ret_nmap <= nmap); + + for (i = 0; i < ret_nmap; i++) { + ASSERT(mval[i].br_blockcount > 0); + if (!(flags & XFS_BMAPI_ENTIRE)) { + ASSERT(mval[i].br_startoff >= bno); + ASSERT(mval[i].br_blockcount <= len); + ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= + bno + len); + } else { + ASSERT(mval[i].br_startoff < bno + len); + ASSERT(mval[i].br_startoff + mval[i].br_blockcount > + bno); + } + ASSERT(i == 0 || + mval[i - 1].br_startoff + mval[i - 1].br_blockcount == + mval[i].br_startoff); + ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && + mval[i].br_startblock != HOLESTARTBLOCK); + ASSERT(mval[i].br_state == XFS_EXT_NORM || + mval[i].br_state == XFS_EXT_UNWRITTEN); + } +} + +#else +#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) +#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) +#endif /* DEBUG */ + +/* + * bmap free list manipulation functions + */ + +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +xfs_bmap_add_free( + xfs_fsblock_t bno, /* fs block number of extent */ + xfs_filblks_t len, /* length of extent */ + xfs_bmap_free_t *flist, /* list of extents */ + xfs_mount_t *mp) /* mount point structure */ +{ + xfs_bmap_free_item_t *cur; /* current (next) element */ + xfs_bmap_free_item_t *new; /* new element */ + xfs_bmap_free_item_t *prev; /* previous element */ +#ifdef DEBUG + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_bmap_free_item_zone != NULL); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); + new->xbfi_startblock = bno; + new->xbfi_blockcount = (xfs_extlen_t)len; + for (prev = NULL, cur = flist->xbf_first; + cur != NULL; + prev = cur, cur = cur->xbfi_next) { + if (cur->xbfi_startblock >= bno) + break; + } + if (prev) + prev->xbfi_next = new; + else + flist->xbf_first = new; + new->xbfi_next = cur; + flist->xbf_count++; +} + +/* + * Remove the entry "free" from the free item list. Prev points to the + * previous entry, unless "free" is the head of the list. + */ +void +xfs_bmap_del_free( + xfs_bmap_free_t *flist, /* free item list header */ + xfs_bmap_free_item_t *prev, /* previous item on list, if any */ + xfs_bmap_free_item_t *free) /* list item to be freed */ +{ + if (prev) + prev->xbfi_next = free->xbfi_next; + else + flist->xbf_first = free->xbfi_next; + flist->xbf_count--; + kmem_zone_free(xfs_bmap_free_item_zone, free); +} + +/* + * Free up any items left in the list. + */ +void +xfs_bmap_cancel( + xfs_bmap_free_t *flist) /* list of bmap_free_items */ +{ + xfs_bmap_free_item_t *free; /* free list item */ + xfs_bmap_free_item_t *next; + + if (flist->xbf_count == 0) + return; + ASSERT(flist->xbf_first != NULL); + for (free = flist->xbf_first; free; free = next) { + next = free->xbfi_next; + xfs_bmap_del_free(flist, NULL, free); + } + ASSERT(flist->xbf_count == 0); +} + +/* + * Inode fork format manipulation functions + */ + +/* + * Transform a btree format file with only one leaf node, where the + * extents list will fit in the inode, into an extents format file. + * Since the file extents are already in-core, all we have to do is + * give up the space for the btree root and pitch the leaf block. + */ +STATIC int /* error */ +xfs_bmap_btree_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_btree_cur_t *cur, /* btree cursor */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + /* REFERENCED */ + struct xfs_btree_block *cblock;/* child btree block */ + xfs_fsblock_t cbno; /* child block number */ + xfs_buf_t *cbp; /* child block's buffer */ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork data */ + xfs_mount_t *mp; /* mount point structure */ + __be64 *pp; /* ptr to block address */ + struct xfs_btree_block *rblock;/* root btree block */ + + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + rblock = ifp->if_broot; + ASSERT(be16_to_cpu(rblock->bb_level) == 1); + ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); + cbno = be64_to_cpu(*pp); + *logflagsp = 0; +#ifdef DEBUG + if ((error = xfs_btree_check_lptr(cur, cbno, 1))) + return error; +#endif + error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + return error; + cblock = XFS_BUF_TO_BLOCK(cbp); + if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) + return error; + xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); + ip->i_d.di_nblocks--; + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, cbp); + if (cur->bc_bufs[0] == cbp) + cur->bc_bufs[0] = NULL; + xfs_iroot_realloc(ip, -1, whichfork); + ASSERT(ifp->if_broot == NULL); + ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + return 0; +} + +/* + * Convert an extents-format file into a btree-format file. + * The new file will have a root block (in the inode) and a single child block. + */ +STATIC int /* error */ +xfs_bmap_extents_to_btree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first-block-allocated */ + xfs_bmap_free_t *flist, /* blocks freed in xaction */ + xfs_btree_cur_t **curp, /* cursor returned to caller */ + int wasdel, /* converting a delayed alloc */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + struct xfs_btree_block *ablock; /* allocated (child) bt block */ + xfs_buf_t *abp; /* buffer for ablock */ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_bmbt_rec_t *arp; /* child record pointer */ + struct xfs_btree_block *block; /* btree root block */ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + int error; /* error return value */ + xfs_extnum_t i, cnt; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_key_t *kp; /* root block key pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_bmbt_ptr_t *pp; /* root block address pointer */ + + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); + + /* + * Make space in the inode incore. + */ + xfs_iroot_realloc(ip, 1, whichfork); + ifp->if_flags |= XFS_IFBROOT; + + /* + * Fill in the root. + */ + block = ifp->if_broot; + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, + XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, + XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, + XFS_BMAP_MAGIC, 1, 1, ip->i_ino, + XFS_BTREE_LONG_PTRS); + + /* + * Need a cursor. Can't allocate until bb_level is filled in. + */ + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + /* + * Convert to a btree with two levels, one record in root. + */ + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = mp; + args.firstblock = *firstblock; + if (*firstblock == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); + } else if (flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = *firstblock; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.fsbno = *firstblock; + } + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = wasdel; + *logflagsp = 0; + if ((error = xfs_alloc_vextent(&args))) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + /* + * Allocation can't fail, the space was reserved. + */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(*firstblock == NULLFSBLOCK || + args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || + (flist->xbf_low && + args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); + *firstblock = cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + ip->i_d.di_nblocks++; + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); + /* + * Fill in the child block. + */ + abp->b_ops = &xfs_bmbt_buf_ops; + ablock = XFS_BUF_TO_BLOCK(abp); + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block_int(mp, ablock, abp->b_bn, + XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, + XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block_int(mp, ablock, abp->b_bn, + XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + XFS_BTREE_LONG_PTRS); + + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (cnt = i = 0; i < nextents; i++) { + ep = xfs_iext_get_ext(ifp, i); + if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { + arp->l0 = cpu_to_be64(ep->l0); + arp->l1 = cpu_to_be64(ep->l1); + arp++; cnt++; + } + } + ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); + xfs_btree_set_numrecs(ablock, cnt); + + /* + * Fill in the root key and pointer. + */ + kp = XFS_BMBT_KEY_ADDR(mp, block, 1); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + be16_to_cpu(block->bb_level))); + *pp = cpu_to_be64(args.fsbno); + + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); + xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); + ASSERT(*curp == NULL); + *curp = cur; + *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); + return 0; +} + +/* + * Convert a local file to an extents file. + * This code is out of bounds for data forks of regular files, + * since the file data needs to get logged so things will stay consistent. + * (The bmap-level manipulations are ok, though). + */ +void +xfs_bmap_local_to_extents_empty( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_bytes == 0); + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + + xfs_bmap_forkoff_reset(ip, whichfork); + ifp->if_flags &= ~XFS_IFINLINE; + ifp->if_flags |= XFS_IFEXTENTS; + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); +} + + +STATIC int /* error */ +xfs_bmap_local_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated in xaction */ + xfs_extlen_t total, /* total blocks needed by transaction */ + int *logflagsp, /* inode logging flags */ + int whichfork, + void (*init_fn)(struct xfs_trans *tp, + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp)) +{ + int error = 0; + int flags; /* logging flags returned */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_buf_t *bp; /* buffer for extent block */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + + /* + * We don't want to deal with the case of keeping inode data inline yet. + * So sending the data fork of a regular inode is invalid. + */ + ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + + if (!ifp->if_bytes) { + xfs_bmap_local_to_extents_empty(ip, whichfork); + flags = XFS_ILOG_CORE; + goto done; + } + + flags = 0; + error = 0; + ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == + XFS_IFINLINE); + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = ip->i_mount; + args.firstblock = *firstblock; + /* + * Allocate a block. We know we need only one, since the + * file currently fits in an inode. + */ + if (*firstblock == NULLFSBLOCK) { + args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.fsbno = *firstblock; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + args.total = total; + args.minlen = args.maxlen = args.prod = 1; + error = xfs_alloc_vextent(&args); + if (error) + goto done; + + /* Can't fail, the space was reserved. */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(args.len == 1); + *firstblock = args.fsbno; + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + + /* initialise the block and copy the data */ + init_fn(tp, bp, ip, ifp); + + /* account for the change in fork size and log everything */ + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); + xfs_bmap_local_to_extents_empty(ip, whichfork); + flags |= XFS_ILOG_CORE; + + xfs_iext_add(ifp, 0, 1); + ep = xfs_iext_get_ext(ifp, 0); + xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); + trace_xfs_bmap_post_update(ip, 0, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, + _THIS_IP_); + XFS_IFORK_NEXT_SET(ip, whichfork, 1); + ip->i_d.di_nblocks = 1; + xfs_trans_mod_dquot_byino(tp, ip, + XFS_TRANS_DQ_BCOUNT, 1L); + flags |= xfs_ilog_fext(whichfork); + +done: + *logflagsp = flags; + return error; +} + +/* + * Called from xfs_bmap_add_attrfork to handle btree format files. + */ +STATIC int /* error */ +xfs_bmap_add_attrfork_btree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ +{ + xfs_btree_cur_t *cur; /* btree cursor */ + int error; /* error return value */ + xfs_mount_t *mp; /* file system mount struct */ + int stat; /* newroot status */ + + mp = ip->i_mount; + if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) + *flags |= XFS_ILOG_DBROOT; + else { + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); + cur->bc_private.b.flist = flist; + cur->bc_private.b.firstblock = *firstblock; + if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) + goto error0; + /* must be at least one entry */ + XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); + if ((error = xfs_btree_new_iroot(cur, flags, &stat))) + goto error0; + if (stat == 0) { + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return ENOSPC; + } + *firstblock = cur->bc_private.b.firstblock; + cur->bc_private.b.allocated = 0; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + } + return 0; +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Called from xfs_bmap_add_attrfork to handle extents format files. + */ +STATIC int /* error */ +xfs_bmap_add_attrfork_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ +{ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + int error; /* error return value */ + + if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) + return 0; + cur = NULL; + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, + flags, XFS_DATA_FORK); + if (cur) { + cur->bc_private.b.allocated = 0; + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + return error; +} + +/* + * Called from xfs_bmap_add_attrfork to handle local format files. Each + * different data fork content type needs a different callout to do the + * conversion. Some are basic and only require special block initialisation + * callouts for the data formating, others (directories) are so specialised they + * handle everything themselves. + * + * XXX (dgc): investigate whether directory conversion can use the generic + * formatting callout. It should be possible - it's just a very complex + * formatter. + */ +STATIC int /* error */ +xfs_bmap_add_attrfork_local( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ +{ + xfs_da_args_t dargs; /* args for dir/attr code */ + + if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) + return 0; + + if (S_ISDIR(ip->i_d.di_mode)) { + memset(&dargs, 0, sizeof(dargs)); + dargs.geo = ip->i_mount->m_dir_geo; + dargs.dp = ip; + dargs.firstblock = firstblock; + dargs.flist = flist; + dargs.total = dargs.geo->fsbcount; + dargs.whichfork = XFS_DATA_FORK; + dargs.trans = tp; + return xfs_dir2_sf_to_block(&dargs); + } + + if (S_ISLNK(ip->i_d.di_mode)) + return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, + flags, XFS_DATA_FORK, + xfs_symlink_local_to_remote); + + /* should only be called for types that support local format data */ + ASSERT(0); + return EFSCORRUPTED; +} + +/* + * Convert inode from non-attributed to attributed. + * Must not be in a transaction, ip must not be locked. + */ +int /* error code */ +xfs_bmap_add_attrfork( + xfs_inode_t *ip, /* incore inode pointer */ + int size, /* space new attribute needs */ + int rsvd) /* xact may use reserved blks */ +{ + xfs_fsblock_t firstblock; /* 1st block/ag allocated */ + xfs_bmap_free_t flist; /* freed extent records */ + xfs_mount_t *mp; /* mount structure */ + xfs_trans_t *tp; /* transaction pointer */ + int blks; /* space reservation */ + int version = 1; /* superblock attr version */ + int committed; /* xaction was committed */ + int logflags; /* logging flags */ + int error; /* error return value */ + int cancel_flags = 0; + + ASSERT(XFS_IFORK_Q(ip) == 0); + + mp = ip->i_mount; + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); + blks = XFS_ADDAFORK_SPACE_RES(mp); + if (rsvd) + tp->t_flags |= XFS_TRANS_RESERVE; + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? + XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : + XFS_QMOPT_RES_REGBLKS); + if (error) + goto trans_cancel; + cancel_flags |= XFS_TRANS_ABORT; + if (XFS_IFORK_Q(ip)) + goto trans_cancel; + if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { + /* + * For inodes coming from pre-6.2 filesystems. + */ + ASSERT(ip->i_d.di_aformat == 0); + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + } + ASSERT(ip->i_d.di_anextents == 0); + + xfs_trans_ijoin(tp, ip, 0); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_DEV: + ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; + break; + case XFS_DINODE_FMT_UUID: + ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; + break; + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); + if (!ip->i_d.di_forkoff) + ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; + else if (mp->m_flags & XFS_MOUNT_ATTR2) + version = 2; + break; + default: + ASSERT(0); + error = EINVAL; + goto trans_cancel; + } + + ASSERT(ip->i_afp == NULL); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp->if_flags = XFS_IFEXTENTS; + logflags = 0; + xfs_bmap_init(&flist, &firstblock); + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_LOCAL: + error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, + &logflags); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, + &flist, &logflags); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, + &logflags); + break; + default: + error = 0; + break; + } + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (error) + goto bmap_cancel; + if (!xfs_sb_version_hasattr(&mp->m_sb) || + (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { + __int64_t sbfields = 0; + + spin_lock(&mp->m_sb_lock); + if (!xfs_sb_version_hasattr(&mp->m_sb)) { + xfs_sb_version_addattr(&mp->m_sb); + sbfields |= XFS_SB_VERSIONNUM; + } + if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { + xfs_sb_version_addattr2(&mp->m_sb); + sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + } + if (sbfields) { + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, sbfields); + } else + spin_unlock(&mp->m_sb_lock); + } + + error = xfs_bmap_finish(&tp, &flist, &committed); + if (error) + goto bmap_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +bmap_cancel: + xfs_bmap_cancel(&flist); +trans_cancel: + xfs_trans_cancel(tp, cancel_flags); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* + * Internal and external extent tree search functions. + */ + +/* + * Read in the extents to if_extents. + * All inode fields are set up by caller, we just traverse the btree + * and copy the records in. If the file system cannot contain unwritten + * extents, the records are checked for no "state" flags. + */ +int /* error */ +xfs_bmap_read_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ + xfs_extnum_t i, j; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ + /* REFERENCED */ + xfs_extnum_t room; /* number of entries there's room for */ + + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : + XFS_EXTFMT_INODE(ip); + block = ifp->if_broot; + /* + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + */ + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. + */ + while (level-- > 0) { + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) + return error; + block = XFS_BUF_TO_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, level), + error0); + if (level == 0) + break; + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_trans_brelse(tp, bp); + } + /* + * Here with bp and block set to the leftmost leaf node in the tree. + */ + room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + i = 0; + /* + * Loop over all leaf nodes. Copy information to the extent records. + */ + for (;;) { + xfs_bmbt_rec_t *frp; + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + xfs_extnum_t start; + + num_recs = xfs_btree_get_numrecs(block); + if (unlikely(i + num_recs > room)) { + ASSERT(i + num_recs <= room); + xfs_warn(ip->i_mount, + "corrupt dinode %Lu, (btree extents).", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", + XFS_ERRLEVEL_LOW, ip->i_mount, block); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, 0), + error0); + /* + * Read-ahead the next leaf block, if any. + */ + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + if (nextbno != NULLFSBLOCK) + xfs_btree_reada_bufl(mp, nextbno, 1, + &xfs_bmbt_buf_ops); + /* + * Copy records into the extent records. + */ + frp = XFS_BMBT_REC_ADDR(mp, block, 1); + start = i; + for (j = 0; j < num_recs; j++, i++, frp++) { + xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); + trp->l0 = be64_to_cpu(frp->l0); + trp->l1 = be64_to_cpu(frp->l1); + } + if (exntf == XFS_EXTFMT_NOSTATE) { + /* + * Check all attribute bmap btree records and + * any "older" data bmap btree records for a + * set bit in the "extent flag" position. + */ + if (unlikely(xfs_check_nostate_extents(ifp, + start, num_recs))) { + XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", + XFS_ERRLEVEL_LOW, + ip->i_mount); + goto error0; + } + } + xfs_trans_brelse(tp, bp); + bno = nextbno; + /* + * If we've reached the end, stop. + */ + if (bno == NULLFSBLOCK) + break; + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) + return error; + block = XFS_BUF_TO_BLOCK(bp); + } + ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); + XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); + return 0; +error0: + xfs_trans_brelse(tp, bp); + return EFSCORRUPTED; +} + + +/* + * Search the extent records for the entry containing block bno. + * If bno lies in a hole, point to the next entry. If bno lies + * past eof, *eofp will be set, and *prevp will contain the last + * entry (null if none). Else, *lastxp will be set to the index + * of the found entry; *gotp will contain the entry. + */ +STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +xfs_bmap_search_multi_extents( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number searched for */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +{ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + xfs_extnum_t lastx; /* last extent index */ + + /* + * Initialize the extent entry structure to catch access to + * uninitialized br_startblock field. + */ + gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; + gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; + gotp->br_state = XFS_EXT_INVALID; +#if XFS_BIG_BLKNOS + gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; +#else + gotp->br_startblock = 0xffffa5a5; +#endif + prevp->br_startoff = NULLFILEOFF; + + ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); + if (lastx > 0) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); + } + if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + xfs_bmbt_get_all(ep, gotp); + *eofp = 0; + } else { + if (lastx > 0) { + *gotp = *prevp; + } + *eofp = 1; + ep = NULL; + } + *lastxp = lastx; + return ep; +} + +/* + * Search the extents list for the inode, for the extent containing bno. + * If bno lies in a hole, point to the next entry. If bno lies past eof, + * *eofp will be set, and *prevp will contain the last entry (null if none). + * Else, *lastxp will be set to the index of the found + * entry; *gotp will contain the entry. + */ +STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +xfs_bmap_search_extents( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t bno, /* block number searched for */ + int fork, /* data or attr fork */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +{ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + + XFS_STATS_INC(xs_look_exlist); + ifp = XFS_IFORK_PTR(ip, fork); + + ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); + + if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && + !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { + xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, + "Access to block zero in inode %llu " + "start_block: %llx start_off: %llx " + "blkcnt: %llx extent-state: %x lastx: %x", + (unsigned long long)ip->i_ino, + (unsigned long long)gotp->br_startblock, + (unsigned long long)gotp->br_startoff, + (unsigned long long)gotp->br_blockcount, + gotp->br_state, *lastxp); + *lastxp = NULLEXTNUM; + *eofp = 1; + return NULL; + } + return ep; +} + +/* + * Returns the file-relative block number of the first unused block(s) + * in the file with at least "len" logically contiguous blocks free. + * This is the lowest-address hole if the file has holes, else the first block + * past the end of file. + * Return 0 if the file is currently local (in-inode). + */ +int /* error */ +xfs_bmap_first_unused( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_extlen_t len, /* size of hole to find */ + xfs_fileoff_t *first_unused, /* unused block */ + int whichfork) /* data or attr fork */ +{ + int error; /* error return value */ + int idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t lastaddr; /* last block number seen */ + xfs_fileoff_t lowest; /* lowest useful block */ + xfs_fileoff_t max; /* starting useful block */ + xfs_fileoff_t off; /* offset for this block */ + xfs_extnum_t nextents; /* number of extent entries */ + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *first_unused = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + lowest = *first_unused; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); + off = xfs_bmbt_get_startoff(ep); + /* + * See if the hole before this extent will work. + */ + if (off >= lowest + len && off - max >= len) { + *first_unused = max; + return 0; + } + lastaddr = off + xfs_bmbt_get_blockcount(ep); + max = XFS_FILEOFF_MAX(lastaddr, lowest); + } + *first_unused = max; + return 0; +} + +/* + * Returns the file-relative block number of the last block - 1 before + * last_block (input value) in the file. + * This is not based on i_size, it is based on the extent records. + * Returns 0 for local files, as they do not have extent records. + */ +int /* error */ +xfs_bmap_last_before( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ +{ + xfs_fileoff_t bno; /* input file offset */ + int eof; /* hit end of file */ + xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ + int error; /* error return value */ + xfs_bmbt_irec_t got; /* current extent value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent used */ + xfs_bmbt_irec_t prev; /* previous extent value */ + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return EIO; + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *last_block = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + bno = *last_block - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + if (eof || xfs_bmbt_get_startoff(ep) > bno) { + if (prev.br_startoff == NULLFILEOFF) + *last_block = 0; + else + *last_block = prev.br_startoff + prev.br_blockcount; + } + /* + * Otherwise *last_block is already the right answer. + */ + return 0; +} + +int +xfs_bmap_last_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *rec, + int *is_empty) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int error; + int nextents; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *is_empty = 1; + return 0; + } + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); + *is_empty = 0; + return 0; +} + +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + * + * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be + * at, or past the EOF. + */ +STATIC int +xfs_bmap_isaeof( + struct xfs_bmalloca *bma, + int whichfork) +{ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + bma->aeof = 0; + error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, + &is_empty); + if (error) + return error; + + if (is_empty) { + bma->aeof = 1; + return 0; + } + + /* + * Check if we are allocation or past the last extent, or at least into + * the last delayed allocated extent. + */ + bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || + (bma->offset >= rec.br_startoff && + isnullstartblock(rec.br_startblock)); + return 0; +} + +/* + * Returns the file-relative block number of the first block past eof in + * the file. This is not based on i_size, it is based on the extent records. + * Returns 0 for local files, as they do not have extent records. + */ +int +xfs_bmap_last_offset( + struct xfs_inode *ip, + xfs_fileoff_t *last_block, + int whichfork) +{ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + *last_block = 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) + return 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return EIO; + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) + return error; + + *last_block = rec.br_startoff + rec.br_blockcount; + return 0; +} + +/* + * Returns whether the selected fork of the inode has exactly one + * block or not. For the data fork we check this matches di_size, + * implying the file's range is 0..bsize-1. + */ +int /* 1=>1 block, 0=>otherwise */ +xfs_bmap_one_block( + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int rval; /* return value */ + xfs_bmbt_irec_t s; /* internal version of extent */ + +#ifndef DEBUG + if (whichfork == XFS_DATA_FORK) + return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; +#endif /* !DEBUG */ + if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) + return 0; + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ep = xfs_iext_get_ext(ifp, 0); + xfs_bmbt_get_all(ep, &s); + rval = s.br_startoff == 0 && s.br_blockcount == 1; + if (rval && whichfork == XFS_DATA_FORK) + ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); + return rval; +} + +/* + * Extent tree manipulation functions used during allocation. + */ + +/* + * Convert a delayed allocation to a real allocation. + */ +STATIC int /* error */ +xfs_bmap_add_extent_delay_real( + struct xfs_bmalloca *bma) +{ + struct xfs_bmbt_irec *new = &bma->got; + int diff; /* temp value */ + xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ + int error; /* error return value */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval=0; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ + xfs_filblks_t da_new; /* new count del alloc blocks used */ + xfs_filblks_t da_old; /* old count del alloc blocks used */ + xfs_filblks_t temp=0; /* value for da_new calculations */ + xfs_filblks_t temp2=0;/* value for da_new calculations */ + int tmp_rval; /* partial logging flags */ + + ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); + + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + + XFS_STATS_INC(xs_add_exlist); + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] + + /* + * Set up a bunch of variables to make the tests simpler. + */ + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_get_all(ep, &PREV); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + + da_old = startblockval(PREV.br_startblock); + da_new = 0; + + /* + * Set flags determining what part of the previous delayed allocation + * extent is being replaced by a real allocation. + */ + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; + + /* + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. + */ + if (bma->idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; + } + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == new->br_state && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + /* + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. + */ + if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); + + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + new->br_state == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; + + error = 0; + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The left and right neighbors are both contiguous with new. + */ + bma->idx--; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); + bma->ip->i_d.di_nextents--; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_delete(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The left neighbor is contiguous, the right is not. + */ + bma->idx--; + + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + LEFT.br_blockcount + PREV.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount, LEFT.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The right neighbor is contiguous, the left is not. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, PREV.br_startoff, + new->br_startblock, + PREV.br_blockcount + + RIGHT.br_blockcount, PREV.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: + /* + * Filling in all of a previously delayed allocation extent. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, new->br_startblock); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: + /* + * Filling in the first part of a previous delayed allocation. + * The left neighbor is contiguous. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); + + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + new->br_blockcount, + LEFT.br_state); + if (error) + goto done; + } + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock)); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx--; + break; + + case BMAP_LEFT_FILLING: + /* + * Filling in the first part of a previous delayed allocation. + * The left neighbor is not contiguous. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startoff(ep, new_endoff); + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, + &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx + 1); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + break; + + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Filling in the last part of a previous delayed allocation. + * The right neighbor is contiguous with the new allocation. + */ + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), + new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + RIGHT.br_state); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + RIGHT.br_blockcount, + RIGHT.br_state); + if (error) + goto done; + } + + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock)); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; + break; + + case BMAP_RIGHT_FILLING: + /* + * Filling in the last part of a previous delayed allocation. + * The right neighbor is not contiguous. + */ + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, 1, + &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; + break; + + case 0: + /* + * Filling in the middle part of a previous delayed allocation. + * Contiguity is impossible here. + * This case is avoided almost all the time. + * + * We start with a delayed allocation: + * + * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ + * PREV @ idx + * + * and we are allocating: + * +rrrrrrrrrrrrrrrrr+ + * new + * + * and we set it up for insertion as: + * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ + * new + * PREV @ idx LEFT RIGHT + * inserted at idx + 1 + */ + temp = new->br_startoff - PREV.br_startoff; + temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ + LEFT = *new; + RIGHT.br_state = PREV.br_state; + RIGHT.br_startblock = nullstartblock( + (int)xfs_bmap_worst_indlen(bma->ip, temp2)); + RIGHT.br_startoff = new_endoff; + RIGHT.br_blockcount = temp2; + /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ + xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 1, &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + temp = xfs_bmap_worst_indlen(bma->ip, temp); + temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); + diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + if (diff > 0) { + error = xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), 0); + ASSERT(!error); + if (error) + goto done; + } + + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), + nullstartblock((int)temp2)); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); + + bma->idx++; + da_new = temp + temp2; + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: + /* + * These cases are all impossible. + */ + ASSERT(0); + } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + da_old > 0, &tmp_logflags, XFS_DATA_FORK); + bma->logflags |= tmp_logflags; + if (error) + goto done; + } + + /* adjust for changes in reserved delayed indirect blocks */ + if (da_old || da_new) { + temp = da_new; + if (bma->cur) + temp += bma->cur->bc_private.b.allocated; + ASSERT(temp <= da_old); + if (temp < da_old) + xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + (int64_t)(da_old - temp), 0); + } + + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); +done: + bma->logflags |= rval; + return error; +#undef LEFT +#undef RIGHT +#undef PREV +} + +/* + * Convert an unwritten allocation to a real allocation or vice versa. + */ +STATIC int /* error */ +xfs_bmap_add_extent_unwritten_real( + struct xfs_trans *tp, + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to add to file extents */ + xfs_fsblock_t *first, /* pointer to firstblock variable */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + int *logflagsp) /* inode logging flags */ +{ + xfs_btree_cur_t *cur; /* btree cursor */ + xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ + int error; /* error return value */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_exntst_t newext; /* new extent state */ + xfs_exntst_t oldext; /* old extent state */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval=0; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ + + *logflagsp = 0; + + cur = *curp; + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + + ASSERT(*idx >= 0); + ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + + XFS_STATS_INC(xs_add_exlist); + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] + + /* + * Set up a bunch of variables to make the tests simpler. + */ + error = 0; + ep = xfs_iext_get_ext(ifp, *idx); + xfs_bmbt_get_all(ep, &PREV); + newext = new->br_state; + oldext = (newext == XFS_EXT_UNWRITTEN) ? + XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + ASSERT(PREV.br_state == oldext); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + + /* + * Set flags determining what part of the previous oldext allocation + * extent is being replaced by a newext allocation. + */ + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; + + /* + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. + */ + if (*idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; + } + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == newext && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + /* + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. + */ + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + newext == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; + + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The left and right neighbors are both contiguous with new. + */ + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 2, state); + ip->i_d.di_nextents -= 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state))) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The left neighbor is contiguous, the right is not. + */ + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), + LEFT.br_blockcount + PREV.br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount, + LEFT.br_state))) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The right neighbor is contiguous, the left is not. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + xfs_bmbt_set_state(ep, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_remove(ip, *idx + 1, 1, state); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext))) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: + /* + * Setting all of a previous oldext extent to newext. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_state(ep, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + newext))) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; + + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + new->br_blockcount, + LEFT.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is not contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); + xfs_bmbt_set_startoff(ep, new_endoff); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_insert(ip, *idx, 1, new, state); + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + cur->bc_rec.b = *new; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is contiguous with the new allocation. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_btree_increment(cur, 0, &i))) + goto done; + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext))) + goto done; + } + break; + + case BMAP_RIGHT_FILLING: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is not contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); + + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + + case 0: + /* + * Setting the middle part of a previous oldext extent to + * newext. Contiguity is impossible here. + * One extent becomes three extents. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + new->br_startoff - PREV.br_startoff); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + r[0] = *new; + r[1].br_startoff = new_endoff; + r[1].br_blockcount = + PREV.br_startoff + PREV.br_blockcount - new_endoff; + r[1].br_startblock = new->br_startblock + new->br_blockcount; + r[1].br_state = oldext; + + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + + ip->i_d.di_nextents += 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* new right extent - oldext */ + if ((error = xfs_bmbt_update(cur, r[1].br_startoff, + r[1].br_startblock, r[1].br_blockcount, + r[1].br_state))) + goto done; + /* new left extent - oldext */ + cur->bc_rec.b = PREV; + cur->bc_rec.b.br_blockcount = + new->br_startoff - PREV.br_startoff; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* + * Reset the cursor to the position of the new extent + * we are about to insert as we can't trust it after + * the previous insert. + */ + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + /* new middle extent - newext */ + cur->bc_rec.b.br_state = new->br_state; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: + /* + * These cases are all impossible. + */ + ASSERT(0); + } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, + 0, &tmp_logflags, XFS_DATA_FORK); + *logflagsp |= tmp_logflags; + if (error) + goto done; + } + + /* clear out the allocated field, done with it now in any case. */ + if (cur) { + cur->bc_private.b.allocated = 0; + *curp = cur; + } + + xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); +done: + *logflagsp |= rval; + return error; +#undef LEFT +#undef RIGHT +#undef PREV +} + +/* + * Convert a hole to a delayed allocation. + */ +STATIC void +xfs_bmap_add_extent_hole_delay( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_bmbt_irec_t *new) /* new data to add to file extents */ +{ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_filblks_t newlen=0; /* new indirect size */ + xfs_filblks_t oldlen=0; /* old indirect size */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int state; /* state bits, accessed thru macros */ + xfs_filblks_t temp=0; /* temp for indirect calculations */ + + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + state = 0; + ASSERT(isnullstartblock(new->br_startblock)); + + /* + * Check and set flags if this segment has a left neighbor + */ + if (*idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); + + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; + } + + /* + * Check and set flags if the current (right) segment exists. + * If it doesn't exist, we're converting the hole at end-of-file. + */ + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); + + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } + + /* + * Set contiguity flags on the left and right neighbors. + * Don't let extents get too large, even if the pieces are contiguous. + */ + if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + (left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN))) + state |= BMAP_RIGHT_CONTIG; + + /* + * Switch out based on the contiguity flags. + */ + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with delayed allocations + * on the left and on the right. + * Merge all three into a single extent record. + */ + --*idx; + temp = left.br_blockcount + new->br_blockcount + + right.br_blockcount; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock) + + startblockval(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); + break; + + case BMAP_LEFT_CONTIG: + /* + * New allocation is contiguous with a delayed allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + --*idx; + temp = left.br_blockcount + new->br_blockcount; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + + case BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with a delayed allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + temp = new->br_blockcount + right.br_blockcount; + oldlen = startblockval(new->br_startblock) + + startblockval(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, + nullstartblock((int)newlen), temp, right.br_state); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + + case 0: + /* + * New allocation is not contiguous with another + * delayed allocation. + * Insert a new entry. + */ + oldlen = newlen = 0; + xfs_iext_insert(ip, *idx, 1, new, state); + break; + } + if (oldlen != newlen) { + ASSERT(oldlen > newlen); + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, + (int64_t)(oldlen - newlen), 0); + /* + * Nothing to do for disk quota accounting here. + */ + } +} + +/* + * Convert a hole to a real allocation. + */ +STATIC int /* error */ +xfs_bmap_add_extent_hole_real( + struct xfs_bmalloca *bma, + int whichfork) +{ + struct xfs_bmbt_irec *new = &bma->got; + int error; /* error return value */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int rval=0; /* return value (logging flags) */ + int state; /* state bits, accessed thru macros */ + + ifp = XFS_IFORK_PTR(bma->ip, whichfork); + + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + + XFS_STATS_INC(xs_add_exlist); + + state = 0; + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + + /* + * Check and set flags if this segment has a left neighbor. + */ + if (bma->idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; + } + + /* + * Check and set flags if this segment has a current value. + * Not true if we're inserting into the "hole" at eof. + */ + if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } + + /* + * We're inserting a real allocation between "left" and "right". + * Set the contiguity flags. Don't let extents get too large. + */ + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_startblock + left.br_blockcount == new->br_startblock && + left.br_state == new->br_state && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_startblock + new->br_blockcount == right.br_startblock && + new->br_state == right.br_state && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; + + error = 0; + /* + * Select which case we're in here, and implement it. + */ + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with real allocations on the + * left and on the right. + * Merge all three into a single extent record. + */ + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + left.br_blockcount + new->br_blockcount + + right.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); + if (bma->cur == NULL) { + rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + } else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, + right.br_startblock, right.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_delete(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount + + right.br_blockcount, + left.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_CONTIG: + /* + * New allocation is contiguous with a real allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + left.br_blockcount + new->br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + if (bma->cur == NULL) { + rval = xfs_ilog_fext(whichfork); + } else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, + left.br_startblock, left.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount, + left.br_state); + if (error) + goto done; + } + break; + + case BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with a real allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), + new->br_startoff, new->br_startblock, + new->br_blockcount + right.br_blockcount, + right.br_state); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + if (bma->cur == NULL) { + rval = xfs_ilog_fext(whichfork); + } else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, + right.br_startoff, + right.br_startblock, + right.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + right.br_blockcount, + right.br_state); + if (error) + goto done; + } + break; + + case 0: + /* + * New allocation is not contiguous with another + * real allocation. + * Insert a new entry. + */ + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); + if (bma->cur == NULL) { + rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + } else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, + new->br_startoff, + new->br_startblock, + new->br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = new->br_state; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, whichfork)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 0, &tmp_logflags, whichfork); + bma->logflags |= tmp_logflags; + if (error) + goto done; + } + + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); +done: + bma->logflags |= rval; + return error; +} + +/* + * Functions used in the extent read, allocate and remove paths + */ + +/* + * Adjust the size of the new extent based on di_extsize and rt extsize. + */ +int +xfs_bmap_extsize_align( + xfs_mount_t *mp, + xfs_bmbt_irec_t *gotp, /* next extent pointer */ + xfs_bmbt_irec_t *prevp, /* previous extent pointer */ + xfs_extlen_t extsz, /* align to this extent size */ + int rt, /* is this a realtime inode? */ + int eof, /* is extent at end-of-file? */ + int delay, /* creating delalloc extent? */ + int convert, /* overwriting unwritten extent? */ + xfs_fileoff_t *offp, /* in/out: aligned offset */ + xfs_extlen_t *lenp) /* in/out: aligned length */ +{ + xfs_fileoff_t orig_off; /* original offset */ + xfs_extlen_t orig_alen; /* original length */ + xfs_fileoff_t orig_end; /* original off+len */ + xfs_fileoff_t nexto; /* next file offset */ + xfs_fileoff_t prevo; /* previous file offset */ + xfs_fileoff_t align_off; /* temp for offset */ + xfs_extlen_t align_alen; /* temp for length */ + xfs_extlen_t temp; /* temp for calculations */ + + if (convert) + return 0; + + orig_off = align_off = *offp; + orig_alen = align_alen = *lenp; + orig_end = orig_off + orig_alen; + + /* + * If this request overlaps an existing extent, then don't + * attempt to perform any additional alignment. + */ + if (!delay && !eof && + (orig_off >= gotp->br_startoff) && + (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { + return 0; + } + + /* + * If the file offset is unaligned vs. the extent size + * we need to align it. This will be possible unless + * the file was previously written with a kernel that didn't + * perform this alignment, or if a truncate shot us in the + * foot. + */ + temp = do_mod(orig_off, extsz); + if (temp) { + align_alen += temp; + align_off -= temp; + } + /* + * Same adjustment for the end of the requested area. + */ + if ((temp = (align_alen % extsz))) { + align_alen += extsz - temp; + } + /* + * If the previous block overlaps with this proposed allocation + * then move the start forward without adjusting the length. + */ + if (prevp->br_startoff != NULLFILEOFF) { + if (prevp->br_startblock == HOLESTARTBLOCK) + prevo = prevp->br_startoff; + else + prevo = prevp->br_startoff + prevp->br_blockcount; + } else + prevo = 0; + if (align_off != orig_off && align_off < prevo) + align_off = prevo; + /* + * If the next block overlaps with this proposed allocation + * then move the start back without adjusting the length, + * but not before offset 0. + * This may of course make the start overlap previous block, + * and if we hit the offset 0 limit then the next block + * can still overlap too. + */ + if (!eof && gotp->br_startoff != NULLFILEOFF) { + if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || + (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) + nexto = gotp->br_startoff + gotp->br_blockcount; + else + nexto = gotp->br_startoff; + } else + nexto = NULLFILEOFF; + if (!eof && + align_off + align_alen != orig_end && + align_off + align_alen > nexto) + align_off = nexto > align_alen ? nexto - align_alen : 0; + /* + * If we're now overlapping the next or previous extent that + * means we can't fit an extsz piece in this hole. Just move + * the start forward to the first valid spot and set + * the length so we hit the end. + */ + if (align_off != orig_off && align_off < prevo) + align_off = prevo; + if (align_off + align_alen != orig_end && + align_off + align_alen > nexto && + nexto != NULLFILEOFF) { + ASSERT(nexto > prevo); + align_alen = nexto - align_off; + } + + /* + * If realtime, and the result isn't a multiple of the realtime + * extent size we need to remove blocks until it is. + */ + if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { + /* + * We're not covering the original request, or + * we won't be able to once we fix the length. + */ + if (orig_off < align_off || + orig_end > align_off + align_alen || + align_alen - temp < orig_alen) + return EINVAL; + /* + * Try to fix it by moving the start up. + */ + if (align_off + temp <= orig_off) { + align_alen -= temp; + align_off += temp; + } + /* + * Try to fix it by moving the end in. + */ + else if (align_off + align_alen - temp >= orig_end) + align_alen -= temp; + /* + * Set the start to the minimum then trim the length. + */ + else { + align_alen -= orig_off - align_off; + align_off = orig_off; + align_alen -= align_alen % mp->m_sb.sb_rextsize; + } + /* + * Result doesn't cover the request, fail it. + */ + if (orig_off < align_off || orig_end > align_off + align_alen) + return EINVAL; + } else { + ASSERT(orig_off >= align_off); + ASSERT(orig_end <= align_off + align_alen); + } + +#ifdef DEBUG + if (!eof && gotp->br_startoff != NULLFILEOFF) + ASSERT(align_off + align_alen <= gotp->br_startoff); + if (prevp->br_startoff != NULLFILEOFF) + ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); +#endif + + *lenp = align_alen; + *offp = align_off; + return 0; +} + +#define XFS_ALLOC_GAP_UNITS 4 + +void +xfs_bmap_adjacent( + struct xfs_bmalloca *ap) /* bmap alloc argument struct */ +{ + xfs_fsblock_t adjust; /* adjustment to block numbers */ + xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ + xfs_mount_t *mp; /* mount point structure */ + int nullfb; /* true if ap->firstblock isn't set */ + int rt; /* true if inode is realtime */ + +#define ISVALID(x,y) \ + (rt ? \ + (x) < mp->m_sb.sb_rblocks : \ + XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) + + mp = ap->ip->i_mount; + nullfb = *ap->firstblock == NULLFSBLOCK; + rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + /* + * If allocating at eof, and there's a previous real block, + * try to use its last block as our starting point. + */ + if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { + ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; + /* + * Adjust for the gap between prevp and us. + */ + adjust = ap->offset - + (ap->prev.br_startoff + ap->prev.br_blockcount); + if (adjust && + ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + ap->blkno += adjust; + } + /* + * If not at eof, then compare the two neighbor blocks. + * Figure out whether either one gives us a good starting point, + * and pick the better one. + */ + else if (!ap->eof) { + xfs_fsblock_t gotbno; /* right side block number */ + xfs_fsblock_t gotdiff=0; /* right side difference */ + xfs_fsblock_t prevbno; /* left side block number */ + xfs_fsblock_t prevdiff=0; /* left side difference */ + + /* + * If there's a previous (left) block, select a requested + * start block based on it. + */ + if (ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + (prevbno = ap->prev.br_startblock + + ap->prev.br_blockcount) && + ISVALID(prevbno, ap->prev.br_startblock)) { + /* + * Calculate gap to end of previous block. + */ + adjust = prevdiff = ap->offset - + (ap->prev.br_startoff + + ap->prev.br_blockcount); + /* + * Figure the startblock based on the previous block's + * end and the gap size. + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an invalid block + * number, then just use the end of the previous block. + */ + if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && + ISVALID(prevbno + prevdiff, + ap->prev.br_startblock)) + prevbno += adjust; + else + prevdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) + prevbno = NULLFSBLOCK; + } + /* + * No previous block or can't follow it, just default. + */ + else + prevbno = NULLFSBLOCK; + /* + * If there's a following (right) block, select a requested + * start block based on it. + */ + if (!isnullstartblock(ap->got.br_startblock)) { + /* + * Calculate gap to start of next block. + */ + adjust = gotdiff = ap->got.br_startoff - ap->offset; + /* + * Figure the startblock based on the next block's + * start and the gap size. + */ + gotbno = ap->got.br_startblock; + /* + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an invalid block + * number, then just use the start of the next block + * offset by our length. + */ + if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && + ISVALID(gotbno - gotdiff, gotbno)) + gotbno -= adjust; + else if (ISVALID(gotbno - ap->length, gotbno)) { + gotbno -= ap->length; + gotdiff += adjust - ap->length; + } else + gotdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) + gotbno = NULLFSBLOCK; + } + /* + * No next block, just default. + */ + else + gotbno = NULLFSBLOCK; + /* + * If both valid, pick the better one, else the only good + * one, else ap->blkno is already set (to 0 or the inode block). + */ + if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) + ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; + else if (prevbno != NULLFSBLOCK) + ap->blkno = prevbno; + else if (gotbno != NULLFSBLOCK) + ap->blkno = gotbno; + } +#undef ISVALID +} + +static int +xfs_bmap_longest_free_extent( + struct xfs_trans *tp, + xfs_agnumber_t ag, + xfs_extlen_t *blen, + int *notinit) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + xfs_extlen_t longest; + int error = 0; + + pag = xfs_perag_get(mp, ag); + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); + if (error) + goto out; + + if (!pag->pagf_init) { + *notinit = 1; + goto out; + } + } + + longest = xfs_alloc_longest_free_extent(mp, pag); + if (*blen < longest) + *blen = longest; + +out: + xfs_perag_put(pag); + return error; +} + +static void +xfs_bmap_select_minlen( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen, + int notinit) +{ + if (notinit || *blen < ap->minlen) { + /* + * Since we did a BUF_TRYLOCK above, it is possible that + * there is space for this request. + */ + args->minlen = ap->minlen; + } else if (*blen < args->maxlen) { + /* + * If the best seen length is less than the request length, + * use the best as the minimum. + */ + args->minlen = *blen; + } else { + /* + * Otherwise we've seen an extent as big as maxlen, use that + * as the minimum. + */ + args->minlen = args->maxlen; + } +} + +STATIC int +xfs_bmap_btalloc_nullfb( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen) +{ + struct xfs_mount *mp = ap->ip->i_mount; + xfs_agnumber_t ag, startag; + int notinit = 0; + int error; + + args->type = XFS_ALLOCTYPE_START_BNO; + args->total = ap->total; + + startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); + if (startag == NULLAGNUMBER) + startag = ag = 0; + + while (*blen < args->maxlen) { + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; + + if (++ag == mp->m_sb.sb_agcount) + ag = 0; + if (ag == startag) + break; + } + + xfs_bmap_select_minlen(ap, args, blen, notinit); + return 0; +} + +STATIC int +xfs_bmap_btalloc_filestreams( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen) +{ + struct xfs_mount *mp = ap->ip->i_mount; + xfs_agnumber_t ag; + int notinit = 0; + int error; + + args->type = XFS_ALLOCTYPE_NEAR_BNO; + args->total = ap->total; + + ag = XFS_FSB_TO_AGNO(mp, args->fsbno); + if (ag == NULLAGNUMBER) + ag = 0; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); + if (error) + return error; + + if (*blen < args->maxlen) { + error = xfs_filestream_new_ag(ap, &ag); + if (error) + return error; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; + + } + + xfs_bmap_select_minlen(ap, args, blen, notinit); + + /* + * Set the failure fallback case to look in the selected AG as stream + * may have moved. + */ + ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); + return 0; +} + +STATIC int +xfs_bmap_btalloc( + struct xfs_bmalloca *ap) /* bmap alloc argument struct */ +{ + xfs_mount_t *mp; /* mount point structure */ + xfs_alloctype_t atype = 0; /* type for allocation routines */ + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ + xfs_agnumber_t ag; + xfs_alloc_arg_t args; + xfs_extlen_t blen; + xfs_extlen_t nextminlen = 0; + int nullfb; /* true if ap->firstblock isn't set */ + int isaligned; + int tryagain; + int error; + int stripe_align; + + ASSERT(ap->length); + + mp = ap->ip->i_mount; + + /* stripe alignment for allocation is determined by mount parameters */ + stripe_align = 0; + if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) + stripe_align = mp->m_swidth; + else if (mp->m_dalign) + stripe_align = mp->m_dalign; + + align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; + if (unlikely(align)) { + error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, + align, 0, ap->eof, 0, ap->conv, + &ap->offset, &ap->length); + ASSERT(!error); + ASSERT(ap->length); + } + + + nullfb = *ap->firstblock == NULLFSBLOCK; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + if (nullfb) { + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { + ag = xfs_filestream_lookup_ag(ap->ip); + ag = (ag != NULLAGNUMBER) ? ag : 0; + ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); + } else { + ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + } + } else + ap->blkno = *ap->firstblock; + + xfs_bmap_adjacent(ap); + + /* + * If allowed, use ap->blkno; otherwise must use firstblock since + * it's in the right allocation group. + */ + if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) + ; + else + ap->blkno = *ap->firstblock; + /* + * Normal allocation, done through xfs_alloc_vextent. + */ + tryagain = isaligned = 0; + memset(&args, 0, sizeof(args)); + args.tp = ap->tp; + args.mp = mp; + args.fsbno = ap->blkno; + + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); + args.firstblock = *ap->firstblock; + blen = 0; + if (nullfb) { + /* + * Search for an allocation group with a single extent large + * enough for the request. If one isn't found, then adjust + * the minimum allocation size to the largest space found. + */ + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); + else + error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); + if (error) + return error; + } else if (ap->flist->xbf_low) { + if (xfs_inode_is_filestream(ap->ip)) + args.type = XFS_ALLOCTYPE_FIRST_AG; + else + args.type = XFS_ALLOCTYPE_START_BNO; + args.total = args.minlen = ap->minlen; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.total = ap->total; + args.minlen = ap->minlen; + } + /* apply extent size hints if obtained earlier */ + if (unlikely(align)) { + args.prod = align; + if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) + args.mod = (xfs_extlen_t)(args.prod - args.mod); + } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { + args.prod = 1; + args.mod = 0; + } else { + args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; + if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) + args.mod = (xfs_extlen_t)(args.prod - args.mod); + } + /* + * If we are not low on available data blocks, and the + * underlying logical volume manager is a stripe, and + * the file offset is zero then try to allocate data + * blocks on stripe unit boundary. + * NOTE: ap->aeof is only set if the allocation length + * is >= the stripe unit and the allocation offset is + * at the end of file. + */ + if (!ap->flist->xbf_low && ap->aeof) { + if (!ap->offset) { + args.alignment = stripe_align; + atype = args.type; + isaligned = 1; + /* + * Adjust for alignment + */ + if (blen > args.alignment && blen <= args.maxlen) + args.minlen = blen - args.alignment; + args.minalignslop = 0; + } else { + /* + * First try an exact bno allocation. + * If it fails then do a near or start bno + * allocation with alignment turned on. + */ + atype = args.type; + tryagain = 1; + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.alignment = 1; + /* + * Compute the minlen+alignment for the + * next case. Set slop so that the value + * of minlen+alignment+slop doesn't go up + * between the calls. + */ + if (blen > stripe_align && blen <= args.maxlen) + nextminlen = blen - stripe_align; + else + nextminlen = args.minlen; + if (nextminlen + stripe_align > args.minlen + 1) + args.minalignslop = + nextminlen + stripe_align - + args.minlen - 1; + else + args.minalignslop = 0; + } + } else { + args.alignment = 1; + args.minalignslop = 0; + } + args.minleft = ap->minleft; + args.wasdel = ap->wasdel; + args.isfl = 0; + args.userdata = ap->userdata; + if ((error = xfs_alloc_vextent(&args))) + return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { + /* + * Exact allocation failed. Now try with alignment + * turned on. + */ + args.type = atype; + args.fsbno = ap->blkno; + args.alignment = stripe_align; + args.minlen = nextminlen; + args.minalignslop = 0; + isaligned = 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (isaligned && args.fsbno == NULLFSBLOCK) { + /* + * allocation failed, so turn off alignment and + * try again. + */ + args.type = atype; + args.fsbno = ap->blkno; + args.alignment = 0; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb && + args.minlen > ap->minlen) { + args.minlen = ap->minlen; + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = ap->blkno; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb) { + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.total = ap->minlen; + args.minleft = 0; + if ((error = xfs_alloc_vextent(&args))) + return error; + ap->flist->xbf_low = 1; + } + if (args.fsbno != NULLFSBLOCK) { + /* + * check the allocation happened at the same or higher AG than + * the first block that was allocated. + */ + ASSERT(*ap->firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(mp, *ap->firstblock) == + XFS_FSB_TO_AGNO(mp, args.fsbno) || + (ap->flist->xbf_low && + XFS_FSB_TO_AGNO(mp, *ap->firstblock) < + XFS_FSB_TO_AGNO(mp, args.fsbno))); + + ap->blkno = args.fsbno; + if (*ap->firstblock == NULLFSBLOCK) + *ap->firstblock = args.fsbno; + ASSERT(nullfb || fb_agno == args.agno || + (ap->flist->xbf_low && fb_agno < args.agno)); + ap->length = args.len; + ap->ip->i_d.di_nblocks += args.len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args.len; + /* + * Adjust the disk quota also. This was reserved + * earlier. + */ + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : + XFS_TRANS_DQ_BCOUNT, + (long) args.len); + } else { + ap->blkno = NULLFSBLOCK; + ap->length = 0; + } + return 0; +} + +/* + * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. + * It figures out where to ask the underlying allocator to put the new extent. + */ +STATIC int +xfs_bmap_alloc( + struct xfs_bmalloca *ap) /* bmap alloc argument struct */ +{ + if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) + return xfs_bmap_rtalloc(ap); + return xfs_bmap_btalloc(ap); +} + +/* + * Trim the returned map to the required bounds + */ +STATIC void +xfs_bmapi_trim_map( + struct xfs_bmbt_irec *mval, + struct xfs_bmbt_irec *got, + xfs_fileoff_t *bno, + xfs_filblks_t len, + xfs_fileoff_t obno, + xfs_fileoff_t end, + int n, + int flags) +{ + if ((flags & XFS_BMAPI_ENTIRE) || + got->br_startoff + got->br_blockcount <= obno) { + *mval = *got; + if (isnullstartblock(got->br_startblock)) + mval->br_startblock = DELAYSTARTBLOCK; + return; + } + + if (obno > *bno) + *bno = obno; + ASSERT((*bno >= obno) || (n == 0)); + ASSERT(*bno < end); + mval->br_startoff = *bno; + if (isnullstartblock(got->br_startblock)) + mval->br_startblock = DELAYSTARTBLOCK; + else + mval->br_startblock = got->br_startblock + + (*bno - got->br_startoff); + /* + * Return the minimum of what we got and what we asked for for + * the length. We can use the len variable here because it is + * modified below and we could have been there before coming + * here if the first part of the allocation didn't overlap what + * was asked for. + */ + mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, + got->br_blockcount - (*bno - got->br_startoff)); + mval->br_state = got->br_state; + ASSERT(mval->br_blockcount <= len); + return; +} + +/* + * Update and validate the extent map to return + */ +STATIC void +xfs_bmapi_update_map( + struct xfs_bmbt_irec **map, + xfs_fileoff_t *bno, + xfs_filblks_t *len, + xfs_fileoff_t obno, + xfs_fileoff_t end, + int *n, + int flags) +{ + xfs_bmbt_irec_t *mval = *map; + + ASSERT((flags & XFS_BMAPI_ENTIRE) || + ((mval->br_startoff + mval->br_blockcount) <= end)); + ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || + (mval->br_startoff < obno)); + + *bno = mval->br_startoff + mval->br_blockcount; + *len = end - *bno; + if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { + /* update previous map with new information */ + ASSERT(mval->br_startblock == mval[-1].br_startblock); + ASSERT(mval->br_blockcount > mval[-1].br_blockcount); + ASSERT(mval->br_state == mval[-1].br_state); + mval[-1].br_blockcount = mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != HOLESTARTBLOCK && + mval->br_startblock == mval[-1].br_startblock + + mval[-1].br_blockcount && + ((flags & XFS_BMAPI_IGSTATE) || + mval[-1].br_state == mval->br_state)) { + ASSERT(mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount); + mval[-1].br_blockcount += mval->br_blockcount; + } else if (*n > 0 && + mval->br_startblock == DELAYSTARTBLOCK && + mval[-1].br_startblock == DELAYSTARTBLOCK && + mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount) { + mval[-1].br_blockcount += mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (!((*n == 0) && + ((mval->br_startoff + mval->br_blockcount) <= + obno))) { + mval++; + (*n)++; + } + *map = mval; +} + +/* + * Map file blocks to filesystem blocks without allocation. + */ +int +xfs_bmapi_read( + struct xfs_inode *ip, + xfs_fileoff_t bno, + xfs_filblks_t len, + struct xfs_bmbt_irec *mval, + int *nmap, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec prev; + xfs_fileoff_t obno; + xfs_fileoff_t end; + xfs_extnum_t lastx; + int error; + int eof; + int n = 0; + int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + + ASSERT(*nmap >= 1); + ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| + XFS_BMAPI_IGSTATE))); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return EIO; + + XFS_STATS_INC(xs_blk_mapr); + + ifp = XFS_IFORK_PTR(ip, whichfork); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, whichfork); + if (error) + return error; + } + + xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + end = bno + len; + obno = bno; + + while (bno < end && n < *nmap) { + /* Reading past eof, act as though there's a hole up to end. */ + if (eof) + got.br_startoff = end; + if (got.br_startoff > bno) { + /* Reading in a hole. */ + mval->br_startoff = bno; + mval->br_startblock = HOLESTARTBLOCK; + mval->br_blockcount = + XFS_FILBLKS_MIN(len, got.br_startoff - bno); + mval->br_state = XFS_EXT_NORM; + bno += mval->br_blockcount; + len -= mval->br_blockcount; + mval++; + n++; + continue; + } + + /* set up the extent map to return. */ + xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + + /* If we're done, stop now. */ + if (bno >= end || n >= *nmap) + break; + + /* Else go on to the next record. */ + if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); + else + eof = 1; + } + *nmap = n; + return 0; +} + +STATIC int +xfs_bmapi_reserve_delalloc( + struct xfs_inode *ip, + xfs_fileoff_t aoff, + xfs_filblks_t len, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *prev, + xfs_extnum_t *lastx, + int eof) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_extlen_t alen; + xfs_extlen_t indlen; + char rt = XFS_IS_REALTIME_INODE(ip); + xfs_extlen_t extsz; + int error; + + alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + if (!eof) + alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + + /* Figure out the extent size, adjust alen */ + extsz = xfs_get_extsz_hint(ip); + if (extsz) { + /* + * Make sure we don't exceed a single extent length when we + * align the extent by reducing length we are going to + * allocate by the maximum amount extent size aligment may + * require. + */ + alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); + error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + 1, 0, &aoff, &alen); + ASSERT(!error); + } + + if (rt) + extsz = alen / mp->m_sb.sb_rextsize; + + /* + * Make a transaction-less quota reservation for delayed allocation + * blocks. This number gets adjusted later. We return if we haven't + * allocated blocks already inside this loop. + */ + error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, + rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + if (error) + return error; + + /* + * Split changing sb for alen and indlen since they could be coming + * from different places. + */ + indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); + ASSERT(indlen > 0); + + if (rt) { + error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + -((int64_t)extsz), 0); + } else { + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + -((int64_t)alen), 0); + } + + if (error) + goto out_unreserve_quota; + + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + -((int64_t)indlen), 0); + if (error) + goto out_unreserve_blocks; + + + ip->i_delayed_blks += alen; + + got->br_startoff = aoff; + got->br_startblock = nullstartblock(indlen); + got->br_blockcount = alen; + got->br_state = XFS_EXT_NORM; + xfs_bmap_add_extent_hole_delay(ip, lastx, got); + + /* + * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay + * might have merged it into one of the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + + ASSERT(got->br_startoff <= aoff); + ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); + ASSERT(isnullstartblock(got->br_startblock)); + ASSERT(got->br_state == XFS_EXT_NORM); + return 0; + +out_unreserve_blocks: + if (rt) + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); + else + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); +out_unreserve_quota: + if (XFS_IS_QUOTA_ON(mp)) + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + return error; +} + +/* + * Map file blocks to filesystem blocks, adding delayed allocations as needed. + */ +int +xfs_bmapi_delay( + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + struct xfs_bmbt_irec *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + int flags) /* XFS_BMAPI_... */ +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + struct xfs_bmbt_irec got; /* current file extent record */ + struct xfs_bmbt_irec prev; /* previous file extent record */ + xfs_fileoff_t obno; /* old block number (offset) */ + xfs_fileoff_t end; /* end of mapped file region */ + xfs_extnum_t lastx; /* last useful extent number */ + int eof; /* we've hit the end of extents */ + int n = 0; /* current extent index */ + int error = 0; + + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); + ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return EIO; + + XFS_STATS_INC(xs_blk_mapw); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + return error; + } + + xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); + end = bno + len; + obno = bno; + + while (bno < end && n < *nmap) { + if (eof || got.br_startoff > bno) { + error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, + &prev, &lastx, eof); + if (error) { + if (n == 0) { + *nmap = 0; + return error; + } + break; + } + } + + /* set up the extent map to return. */ + xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + + /* If we're done, stop now. */ + if (bno >= end || n >= *nmap) + break; + + /* Else go on to the next record. */ + prev = got; + if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); + else + eof = 1; + } + + *nmap = n; + return 0; +} + + +int +__xfs_bmapi_allocate( + struct xfs_bmalloca *bma) +{ + struct xfs_mount *mp = bma->ip->i_mount; + int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); + int tmp_logflags = 0; + int error; + + ASSERT(bma->length > 0); + + /* + * For the wasdelay case, we could also just allocate the stuff asked + * for in this bmap call but that wouldn't be as good. + */ + if (bma->wasdel) { + bma->length = (xfs_extlen_t)bma->got.br_blockcount; + bma->offset = bma->got.br_startoff; + if (bma->idx != NULLEXTNUM && bma->idx) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), + &bma->prev); + } + } else { + bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); + if (!bma->eof) + bma->length = XFS_FILBLKS_MIN(bma->length, + bma->got.br_startoff - bma->offset); + } + + /* + * Indicate if this is the first user data in the file, or just any + * user data. + */ + if (!(bma->flags & XFS_BMAPI_METADATA)) { + bma->userdata = (bma->offset == 0) ? + XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + } + + bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; + + /* + * Only want to do the alignment at the eof if it is userdata and + * allocation length is larger than a stripe unit. + */ + if (mp->m_dalign && bma->length >= mp->m_dalign && + !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { + error = xfs_bmap_isaeof(bma, whichfork); + if (error) + return error; + } + + error = xfs_bmap_alloc(bma); + if (error) + return error; + + if (bma->flist->xbf_low) + bma->minleft = 0; + if (bma->cur) + bma->cur->bc_private.b.firstblock = *bma->firstblock; + if (bma->blkno == NULLFSBLOCK) + return 0; + if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { + bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); + bma->cur->bc_private.b.firstblock = *bma->firstblock; + bma->cur->bc_private.b.flist = bma->flist; + } + /* + * Bump the number of extents we've allocated + * in this call. + */ + bma->nallocs++; + + if (bma->cur) + bma->cur->bc_private.b.flags = + bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + + bma->got.br_startoff = bma->offset; + bma->got.br_startblock = bma->blkno; + bma->got.br_blockcount = bma->length; + bma->got.br_state = XFS_EXT_NORM; + + /* + * A wasdelay extent has been initialized, so shouldn't be flagged + * as unwritten. + */ + if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && + xfs_sb_version_hasextflgbit(&mp->m_sb)) + bma->got.br_state = XFS_EXT_UNWRITTEN; + + if (bma->wasdel) + error = xfs_bmap_add_extent_delay_real(bma); + else + error = xfs_bmap_add_extent_hole_real(bma, whichfork); + + bma->logflags |= tmp_logflags; + if (error) + return error; + + /* + * Update our extent pointer, given that xfs_bmap_add_extent_delay_real + * or xfs_bmap_add_extent_hole_real might have merged it into one of + * the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + + ASSERT(bma->got.br_startoff <= bma->offset); + ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= + bma->offset + bma->length); + ASSERT(bma->got.br_state == XFS_EXT_NORM || + bma->got.br_state == XFS_EXT_UNWRITTEN); + return 0; +} + +STATIC int +xfs_bmapi_convert_unwritten( + struct xfs_bmalloca *bma, + struct xfs_bmbt_irec *mval, + xfs_filblks_t len, + int flags) +{ + int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); + int tmp_logflags = 0; + int error; + + /* check if we need to do unwritten->real conversion */ + if (mval->br_state == XFS_EXT_UNWRITTEN && + (flags & XFS_BMAPI_PREALLOC)) + return 0; + + /* check if we need to do real->unwritten conversion */ + if (mval->br_state == XFS_EXT_NORM && + (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != + (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) + return 0; + + /* + * Modify (by adding) the state flag, if writing. + */ + ASSERT(mval->br_blockcount <= len); + if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { + bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, + bma->ip, whichfork); + bma->cur->bc_private.b.firstblock = *bma->firstblock; + bma->cur->bc_private.b.flist = bma->flist; + } + mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) + ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, + &bma->cur, mval, bma->firstblock, bma->flist, + &tmp_logflags); + bma->logflags |= tmp_logflags; + if (error) + return error; + + /* + * Update our extent pointer, given that + * xfs_bmap_add_extent_unwritten_real might have merged it into one + * of the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + + /* + * We may have combined previously unwritten space with written space, + * so generate another request. + */ + if (mval->br_blockcount < len) + return EAGAIN; + return 0; +} + +/* + * Map file blocks to filesystem blocks, and allocate blocks or convert the + * extent state if necessary. Details behaviour is controlled by the flags + * parameter. Only allocates blocks from a single allocation group, to avoid + * locking problems. + * + * The returned value in "firstblock" from the first call in a transaction + * must be remembered and presented to subsequent calls in "firstblock". + * An upper bound for the number of blocks to be allocated is supplied to + * the first call in "total"; if no allocation group has that many free + * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). + */ +int +xfs_bmapi_write( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + int flags, /* XFS_BMAPI_... */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_extlen_t total, /* total blocks needed */ + struct xfs_bmbt_irec *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + struct xfs_bmap_free *flist) /* i/o: list extents to free */ +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ + xfs_fileoff_t end; /* end of mapped file region */ + int eof; /* after the end of extents */ + int error; /* error return */ + int n; /* current extent index */ + xfs_fileoff_t obno; /* old block number (offset) */ + int whichfork; /* data or attr fork */ + char inhole; /* current location is hole in file */ + char wasdelay; /* old extent was delayed */ + +#ifdef DEBUG + xfs_fileoff_t orig_bno; /* original block number value */ + int orig_flags; /* original flags arg value */ + xfs_filblks_t orig_len; /* original value of len arg */ + struct xfs_bmbt_irec *orig_mval; /* original value of mval */ + int orig_nmap; /* original value of *nmap */ + + orig_bno = bno; + orig_len = len; + orig_flags = flags; + orig_mval = mval; + orig_nmap = *nmap; +#endif + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); + ASSERT(!(flags & XFS_BMAPI_IGSTATE)); + ASSERT(tp != NULL); + ASSERT(len > 0); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return EIO; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + XFS_STATS_INC(xs_blk_mapw); + + if (*firstblock == NULLFSBLOCK) { + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) + bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; + else + bma.minleft = 1; + } else { + bma.minleft = 0; + } + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + goto error0; + } + + xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, + &bma.prev); + n = 0; + end = bno + len; + obno = bno; + + bma.tp = tp; + bma.ip = ip; + bma.total = total; + bma.userdata = 0; + bma.flist = flist; + bma.firstblock = firstblock; + + if (flags & XFS_BMAPI_STACK_SWITCH) + bma.stack_switch = 1; + + while (bno < end && n < *nmap) { + inhole = eof || bma.got.br_startoff > bno; + wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + + /* + * First, deal with the hole before the allocated space + * that we found, if any. + */ + if (inhole || wasdelay) { + bma.eof = eof; + bma.conv = !!(flags & XFS_BMAPI_CONVERT); + bma.wasdel = wasdelay; + bma.offset = bno; + bma.flags = flags; + + /* + * There's a 32/64 bit type mismatch between the + * allocation length request (which can be 64 bits in + * length) and the bma length request, which is + * xfs_extlen_t and therefore 32 bits. Hence we have to + * check for 32-bit overflows and handle them here. + */ + if (len > (xfs_filblks_t)MAXEXTLEN) + bma.length = MAXEXTLEN; + else + bma.length = len; + + ASSERT(len > 0); + ASSERT(bma.length > 0); + error = xfs_bmapi_allocate(&bma); + if (error) + goto error0; + if (bma.blkno == NULLFSBLOCK) + break; + } + + /* Deal with the allocated space we found. */ + xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, + end, n, flags); + + /* Execute unwritten extent conversion if necessary */ + error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); + if (error == EAGAIN) + continue; + if (error) + goto error0; + + /* update the extent map to return */ + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + + /* + * If we're done, stop now. Stop when we've allocated + * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise + * the transaction may get too big. + */ + if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) + break; + + /* Else go on to the next record. */ + bma.prev = bma.got; + if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), + &bma.got); + } else + eof = 1; + } + *nmap = n; + + /* + * Transform from btree to extents, give it cur. + */ + if (xfs_bmap_wants_extents(ip, whichfork)) { + int tmp_logflags = 0; + + ASSERT(bma.cur); + error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, + &tmp_logflags, whichfork); + bma.logflags |= tmp_logflags; + if (error) + goto error0; + } + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || + XFS_IFORK_NEXTENTS(ip, whichfork) > + XFS_IFORK_MAXEXT(ip, whichfork)); + error = 0; +error0: + /* + * Log everything. Do this after conversion, there's no point in + * logging the extent records if we've converted to btree format. + */ + if ((bma.logflags & xfs_ilog_fext(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + bma.logflags &= ~xfs_ilog_fext(whichfork); + else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + bma.logflags &= ~xfs_ilog_fbroot(whichfork); + /* + * Log whatever the flags say, even if error. Otherwise we might miss + * detecting a case where the data is changed, there's an error, + * and it's not logged so we don't shutdown when we should. + */ + if (bma.logflags) + xfs_trans_log_inode(tp, ip, bma.logflags); + + if (bma.cur) { + if (!error) { + ASSERT(*firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(mp, *firstblock) == + XFS_FSB_TO_AGNO(mp, + bma.cur->bc_private.b.firstblock) || + (flist->xbf_low && + XFS_FSB_TO_AGNO(mp, *firstblock) < + XFS_FSB_TO_AGNO(mp, + bma.cur->bc_private.b.firstblock))); + *firstblock = bma.cur->bc_private.b.firstblock; + } + xfs_btree_del_cursor(bma.cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + if (!error) + xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, + orig_nmap, *nmap); + return error; +} + +/* + * Called by xfs_bmapi to update file extent records and the btree + * after removing space (or undoing a delayed allocation). + */ +STATIC int /* error */ +xfs_bmap_del_extent( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_trans_t *tp, /* current transaction pointer */ + xfs_extnum_t *idx, /* extent number to update/delete */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + xfs_btree_cur_t *cur, /* if null, not a btree */ + xfs_bmbt_irec_t *del, /* data to remove from extents */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ + xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ + xfs_fsblock_t del_endblock=0; /* first block past del */ + xfs_fileoff_t del_endoff; /* first offset past del */ + int delay; /* current block is delayed allocated */ + int do_fx; /* free extent at end of routine */ + xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ + int error; /* error return value */ + int flags; /* inode logging flags */ + xfs_bmbt_irec_t got; /* current extent entry */ + xfs_fileoff_t got_endoff; /* first offset past got */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t nblks; /* quota/sb block count */ + xfs_bmbt_irec_t new; /* new record to be inserted */ + /* REFERENCED */ + uint qfield; /* quota field to update */ + xfs_filblks_t temp; /* for indirect length calculations */ + xfs_filblks_t temp2; /* for indirect length calculations */ + int state = 0; + + XFS_STATS_INC(xs_del_exlist); + + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(del->br_blockcount > 0); + ep = xfs_iext_get_ext(ifp, *idx); + xfs_bmbt_get_all(ep, &got); + ASSERT(got.br_startoff <= del->br_startoff); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got.br_startoff + got.br_blockcount; + ASSERT(got_endoff >= del_endoff); + delay = isnullstartblock(got.br_startblock); + ASSERT(isnullstartblock(del->br_startblock) == delay); + flags = 0; + qfield = 0; + error = 0; + /* + * If deleting a real allocation, must free up the disk space. + */ + if (!delay) { + flags = XFS_ILOG_CORE; + /* + * Realtime allocation. Free it and record di_nblocks update. + */ + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { + xfs_fsblock_t bno; + xfs_filblks_t len; + + ASSERT(do_mod(del->br_blockcount, + mp->m_sb.sb_rextsize) == 0); + ASSERT(do_mod(del->br_startblock, + mp->m_sb.sb_rextsize) == 0); + bno = del->br_startblock; + len = del->br_blockcount; + do_div(bno, mp->m_sb.sb_rextsize); + do_div(len, mp->m_sb.sb_rextsize); + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + do_fx = 0; + nblks = len * mp->m_sb.sb_rextsize; + qfield = XFS_TRANS_DQ_RTBCOUNT; + } + /* + * Ordinary allocation. + */ + else { + do_fx = 1; + nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; + } + /* + * Set up del_endblock and cur for later. + */ + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { + if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, got.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + da_old = da_new = 0; + } else { + da_old = startblockval(got.br_startblock); + da_new = 0; + nblks = 0; + do_fx = 0; + } + /* + * Set flag value to use in switch statement. + * Left-contig is 2, right-contig is 1. + */ + switch (((got.br_startoff == del->br_startoff) << 1) | + (got_endoff == del_endoff)) { + case 3: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); + --*idx; + if (delay) + break; + + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + flags |= XFS_ILOG_CORE; + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + break; + + case 2: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_startoff(ep, del_endoff); + temp = got.br_blockcount - del->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + da_new = temp; + break; + } + xfs_bmbt_set_startblock(ep, del_endblock); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, + got.br_blockcount - del->br_blockcount, + got.br_state))) + goto done; + break; + + case 1: + /* + * Deleting the last part of the extent. + */ + temp = got.br_blockcount - del->br_blockcount; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + da_new = temp; + break; + } + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount - del->br_blockcount, + got.br_state))) + goto done; + break; + + case 0: + /* + * Deleting the middle of the extent. + */ + temp = del->br_startoff - got.br_startoff; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + new.br_startoff = del_endoff; + temp2 = got_endoff - del_endoff; + new.br_blockcount = temp2; + new.br_state = got.br_state; + if (!delay) { + new.br_startblock = del_endblock; + flags |= XFS_ILOG_CORE; + if (cur) { + if ((error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, temp, + got.br_state))) + goto done; + if ((error = xfs_btree_increment(cur, 0, &i))) + goto done; + cur->bc_rec.b = new; + error = xfs_btree_insert(cur, &i); + if (error && error != ENOSPC) + goto done; + /* + * If get no-space back from btree insert, + * it tried a split, and we have a zero + * block reservation. + * Fix up our state and return the error. + */ + if (error == ENOSPC) { + /* + * Reset the cursor, don't trust + * it after any insert operation. + */ + if ((error = xfs_bmbt_lookup_eq(cur, + got.br_startoff, + got.br_startblock, + temp, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* + * Update the btree record back + * to the original value. + */ + if ((error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state))) + goto done; + /* + * Reset the extent record back + * to the original value. + */ + xfs_bmbt_set_blockcount(ep, + got.br_blockcount); + flags = 0; + error = ENOSPC; + goto done; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } else + flags |= xfs_ilog_fext(whichfork); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + } else { + ASSERT(whichfork == XFS_DATA_FORK); + temp = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + temp2 = xfs_bmap_worst_indlen(ip, temp2); + new.br_startblock = nullstartblock((int)temp2); + da_new = temp + temp2; + while (da_new > da_old) { + if (temp) { + temp--; + da_new--; + xfs_bmbt_set_startblock(ep, + nullstartblock((int)temp)); + } + if (da_new == da_old) + break; + if (temp2) { + temp2--; + da_new--; + new.br_startblock = + nullstartblock((int)temp2); + } + } + } + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_insert(ip, *idx + 1, 1, &new, state); + ++*idx; + break; + } + /* + * If we need to, add to list of extents to delete. + */ + if (do_fx) + xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, + mp); + /* + * Adjust inode # blocks in the file. + */ + if (nblks) + ip->i_d.di_nblocks -= nblks; + /* + * Adjust quota data. + */ + if (qfield) + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); + + /* + * Account for change in delayed indirect blocks. + * Nothing to do for disk quota accounting here. + */ + ASSERT(da_old >= da_new); + if (da_old > da_new) { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)(da_old - da_new), 0); + } +done: + *logflagsp = flags; + return error; +} + +/* + * Unmap (remove) blocks from a file. + * If nexts is nonzero then the number of extents to remove is limited to + * that value. If not all extents in the block range can be removed then + * *done is set. + */ +int /* error */ +xfs_bunmapi( + xfs_trans_t *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting offset to unmap */ + xfs_filblks_t len, /* length to unmap in file */ + int flags, /* misc flags */ + xfs_extnum_t nexts, /* number of extents max */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *done) /* set if not done yet */ +{ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_irec_t del; /* extent being deleted */ + int eof; /* is deleting at eof */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + int error; /* error return value */ + xfs_extnum_t extno; /* extent number in list */ + xfs_bmbt_irec_t got; /* current extent record */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int isrt; /* freeing in rt area */ + xfs_extnum_t lastx; /* last extent index used */ + int logflags; /* transaction logging flags */ + xfs_extlen_t mod; /* rt extent offset */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_bmbt_irec_t prev; /* previous extent record */ + xfs_fileoff_t start; /* first file offset deleted */ + int tmp_logflags; /* partial logging flags */ + int wasdel; /* was a delayed alloc extent */ + int whichfork; /* data or attribute fork */ + xfs_fsblock_t sum; + + trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); + + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + ifp = XFS_IFORK_PTR(ip, whichfork); + if (unlikely( + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { + XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, + ip->i_mount); + return EFSCORRUPTED; + } + mp = ip->i_mount; + if (XFS_FORCED_SHUTDOWN(mp)) + return EIO; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(len > 0); + ASSERT(nexts >= 0); + + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *done = 1; + return 0; + } + XFS_STATS_INC(xs_blk_unmap); + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + start = bno; + bno = start + len - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + + /* + * Check to see if the given block number is past the end of the + * file, back up to the last block if so... + */ + if (eof) { + ep = xfs_iext_get_ext(ifp, --lastx); + xfs_bmbt_get_all(ep, &got); + bno = got.br_startoff + got.br_blockcount - 1; + } + logflags = 0; + if (ifp->if_flags & XFS_IFBROOT) { + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } else + cur = NULL; + + if (isrt) { + /* + * Synchronize by locking the bitmap inode. + */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + } + + extno = 0; + while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && + (nexts == 0 || extno < nexts)) { + /* + * Is the found extent after a hole in which bno lives? + * Just back up to the previous extent, if so. + */ + if (got.br_startoff > bno) { + if (--lastx < 0) + break; + ep = xfs_iext_get_ext(ifp, lastx); + xfs_bmbt_get_all(ep, &got); + } + /* + * Is the last block of this extent before the range + * we're supposed to delete? If so, we're done. + */ + bno = XFS_FILEOFF_MIN(bno, + got.br_startoff + got.br_blockcount - 1); + if (bno < start) + break; + /* + * Then deal with the (possibly delayed) allocated space + * we found. + */ + ASSERT(ep != NULL); + del = got; + wasdel = isnullstartblock(del.br_startblock); + if (got.br_startoff < start) { + del.br_startoff = start; + del.br_blockcount -= start - got.br_startoff; + if (!wasdel) + del.br_startblock += start - got.br_startoff; + } + if (del.br_startoff + del.br_blockcount > bno + 1) + del.br_blockcount = bno + 1 - del.br_startoff; + sum = del.br_startblock + del.br_blockcount; + if (isrt && + (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent not lined up at the end. + * The extent could have been split into written + * and unwritten pieces, or we could just be + * unmapping part of it. But we can't really + * get rid of part of a realtime extent. + */ + if (del.br_state == XFS_EXT_UNWRITTEN || + !xfs_sb_version_hasextflgbit(&mp->m_sb)) { + /* + * This piece is unwritten, or we're not + * using unwritten extents. Skip over it. + */ + ASSERT(bno >= mod); + bno -= mod > del.br_blockcount ? + del.br_blockcount : mod; + if (bno < got.br_startoff) { + if (--lastx >= 0) + xfs_bmbt_get_all(xfs_iext_get_ext( + ifp, lastx), &got); + } + continue; + } + /* + * It's written, turn it unwritten. + * This is better than zeroing it. + */ + ASSERT(del.br_state == XFS_EXT_NORM); + ASSERT(xfs_trans_get_block_res(tp) > 0); + /* + * If this spans a realtime extent boundary, + * chop it back to the start of the one we end at. + */ + if (del.br_blockcount > mod) { + del.br_startoff += del.br_blockcount - mod; + del.br_startblock += del.br_blockcount - mod; + del.br_blockcount = mod; + } + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, ip, + &lastx, &cur, &del, firstblock, flist, + &logflags); + if (error) + goto error0; + goto nodelete; + } + if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent is lined up at the end but not + * at the front. We'll get rid of full extents if + * we can. + */ + mod = mp->m_sb.sb_rextsize - mod; + if (del.br_blockcount > mod) { + del.br_blockcount -= mod; + del.br_startoff += mod; + del.br_startblock += mod; + } else if ((del.br_startoff == start && + (del.br_state == XFS_EXT_UNWRITTEN || + xfs_trans_get_block_res(tp) == 0)) || + !xfs_sb_version_hasextflgbit(&mp->m_sb)) { + /* + * Can't make it unwritten. There isn't + * a full extent here so just skip it. + */ + ASSERT(bno >= del.br_blockcount); + bno -= del.br_blockcount; + if (got.br_startoff > bno) { + if (--lastx >= 0) { + ep = xfs_iext_get_ext(ifp, + lastx); + xfs_bmbt_get_all(ep, &got); + } + } + continue; + } else if (del.br_state == XFS_EXT_UNWRITTEN) { + /* + * This one is already unwritten. + * It must have a written left neighbor. + * Unwrite the killed part of that one and + * try again. + */ + ASSERT(lastx > 0); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, + lastx - 1), &prev); + ASSERT(prev.br_state == XFS_EXT_NORM); + ASSERT(!isnullstartblock(prev.br_startblock)); + ASSERT(del.br_startblock == + prev.br_startblock + prev.br_blockcount); + if (prev.br_startoff < start) { + mod = start - prev.br_startoff; + prev.br_blockcount -= mod; + prev.br_startblock += mod; + prev.br_startoff = start; + } + prev.br_state = XFS_EXT_UNWRITTEN; + lastx--; + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &prev, + firstblock, flist, &logflags); + if (error) + goto error0; + goto nodelete; + } else { + ASSERT(del.br_state == XFS_EXT_NORM); + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &del, + firstblock, flist, &logflags); + if (error) + goto error0; + goto nodelete; + } + } + if (wasdel) { + ASSERT(startblockval(del.br_startblock) > 0); + /* Update realtime/data freespace, unreserve quota */ + if (isrt) { + xfs_filblks_t rtexts; + + rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + (int64_t)rtexts, 0); + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_RTBLKS); + } else { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)del.br_blockcount, 0); + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_REGBLKS); + } + ip->i_delayed_blks -= del.br_blockcount; + if (cur) + cur->bc_private.b.flags |= + XFS_BTCUR_BPRV_WASDEL; + } else if (cur) + cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; + /* + * If it's the case where the directory code is running + * with no block reservation, and the deleted block is in + * the middle of its extent, and the resulting insert + * of an extent would cause transformation to btree format, + * then reject it. The calling code will then swap + * blocks around instead. + * We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction + * will be dirty. + */ + if (!wasdel && xfs_trans_get_block_res(tp) == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ + XFS_IFORK_MAXEXT(ip, whichfork) && + del.br_startoff > got.br_startoff && + del.br_startoff + del.br_blockcount < + got.br_startoff + got.br_blockcount) { + error = ENOSPC; + goto error0; + } + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, + &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; + bno = del.br_startoff - 1; +nodelete: + /* + * If not done go on to the next (previous) record. + */ + if (bno != (xfs_fileoff_t)-1 && bno >= start) { + if (lastx >= 0) { + ep = xfs_iext_get_ext(ifp, lastx); + if (xfs_bmbt_get_startoff(ep) > bno) { + if (--lastx >= 0) + ep = xfs_iext_get_ext(ifp, + lastx); + } + xfs_bmbt_get_all(ep, &got); + } + extno++; + } + } + *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; + + /* + * Convert to a btree if necessary. + */ + if (xfs_bmap_needs_btree(ip, whichfork)) { + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, + &cur, 0, &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; + } + /* + * transform from btree to extents, give it cur + */ + else if (xfs_bmap_wants_extents(ip, whichfork)) { + ASSERT(cur != NULL); + error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, + whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; + } + /* + * transform from extents to local? + */ + error = 0; +error0: + /* + * Log everything. Do this after conversion, there's no point in + * logging the extent records if we've converted to btree format. + */ + if ((logflags & xfs_ilog_fext(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + logflags &= ~xfs_ilog_fext(whichfork); + else if ((logflags & xfs_ilog_fbroot(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + logflags &= ~xfs_ilog_fbroot(whichfork); + /* + * Log inode even in the error case, if the transaction + * is dirty we'll need to shut down the filesystem. + */ + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (cur) { + if (!error) { + *firstblock = cur->bc_private.b.firstblock; + cur->bc_private.b.allocated = 0; + } + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + return error; +} + +/* + * Shift extent records to the left to cover a hole. + * + * The maximum number of extents to be shifted in a single operation + * is @num_exts, and @current_ext keeps track of the current extent + * index we have shifted. @offset_shift_fsb is the length by which each + * extent is shifted. If there is no hole to shift the extents + * into, this will be considered invalid operation and we abort immediately. + */ +int +xfs_bmap_shift_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + int *done, + xfs_fileoff_t start_fsb, + xfs_fileoff_t offset_shift_fsb, + xfs_extnum_t *current_ext, + xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, + int num_exts) +{ + struct xfs_btree_cur *cur; + struct xfs_bmbt_rec_host *gotp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + xfs_extnum_t nexts = 0; + xfs_fileoff_t startoff; + int error = 0; + int i; + int whichfork = XFS_DATA_FORK; + int logflags; + xfs_filblks_t blockcount = 0; + int total_extents; + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmap_shift_extents", + XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return EIO; + + ASSERT(current_ext != NULL); + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + /* Read in all the extents */ + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + /* + * If *current_ext is 0, we would need to lookup the extent + * from where we would start shifting and store it in gotp. + */ + if (!*current_ext) { + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); + /* + * gotp can be null in 2 cases: 1) if there are no extents + * or 2) start_fsb lies in a hole beyond which there are + * no extents. Either way, we are done. + */ + if (!gotp) { + *done = 1; + return 0; + } + } + + /* We are going to change core inode */ + logflags = XFS_ILOG_CORE; + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } else { + cur = NULL; + logflags |= XFS_ILOG_DEXT; + } + + /* + * There may be delalloc extents in the data fork before the range we + * are collapsing out, so we cannot + * use the count of real extents here. Instead we have to calculate it + * from the incore fork. + */ + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + while (nexts++ < num_exts && *current_ext < total_extents) { + + gotp = xfs_iext_get_ext(ifp, *current_ext); + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* + * Before shifting extent into hole, make sure that the hole + * is large enough to accomodate the shift. + */ + if (*current_ext) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, + *current_ext - 1), &left); + + if (startoff < left.br_startoff + left.br_blockcount) + error = EINVAL; + } else if (offset_shift_fsb > got.br_startoff) { + /* + * When first extent is shifted, offset_shift_fsb + * should be less than the stating offset of + * the first extent. + */ + error = EINVAL; + } + + if (error) + goto del_cursor; + + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + /* Check if we can merge 2 adjacent extents */ + if (*current_ext && + left.br_startoff + left.br_blockcount == startoff && + left.br_startblock + left.br_blockcount == + got.br_startblock && + left.br_state == got.br_state && + left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { + blockcount = left.br_blockcount + + got.br_blockcount; + xfs_iext_remove(ip, *current_ext, 1, 0); + if (cur) { + error = xfs_btree_delete(cur, &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + gotp = xfs_iext_get_ext(ifp, --*current_ext); + xfs_bmbt_get_all(gotp, &got); + + /* Make cursor point to the extent we will update */ + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + xfs_bmbt_set_blockcount(gotp, blockcount); + got.br_blockcount = blockcount; + } else { + /* We have to update the startoff */ + xfs_bmbt_set_startoff(gotp, startoff); + got.br_startoff = startoff; + } + + if (cur) { + error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state); + if (error) + goto del_cursor; + } + + (*current_ext)++; + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + } + + /* Check if we are done */ + if (*current_ext == total_extents) + *done = 1; + +del_cursor: + if (cur) + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + + xfs_trans_log_inode(tp, ip, logflags); + return error; +} diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c new file mode 100644 index 000000000000..de65bb8bab04 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -0,0 +1,967 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_quota.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_dinode.h" + +/* + * Determine the extent state. + */ +/* ARGSUSED */ +STATIC xfs_exntst_t +xfs_extent_state( + xfs_filblks_t blks, + int extent_flag) +{ + if (extent_flag) { + ASSERT(blks != 0); /* saved for DMIG */ + return XFS_EXT_UNWRITTEN; + } + return XFS_EXT_NORM; +} + +/* + * Convert on-disk form of btree root to in-memory form. + */ +void +xfs_bmdr_to_bmbt( + struct xfs_inode *ip, + xfs_bmdr_block_t *dblock, + int dblocklen, + struct xfs_btree_block *rblock, + int rblocklen) +{ + struct xfs_mount *mp = ip->i_mount; + int dmxr; + xfs_bmbt_key_t *fkp; + __be64 *fpp; + xfs_bmbt_key_t *tkp; + __be64 *tpp; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, + XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, + XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, + XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + XFS_BTREE_LONG_PTRS); + + rblock->bb_level = dblock->bb_level; + ASSERT(be16_to_cpu(rblock->bb_level) > 0); + rblock->bb_numrecs = dblock->bb_numrecs; + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); + fkp = XFS_BMDR_KEY_ADDR(dblock, 1); + tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); + tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); + dmxr = be16_to_cpu(dblock->bb_numrecs); + memcpy(tkp, fkp, sizeof(*fkp) * dmxr); + memcpy(tpp, fpp, sizeof(*fpp) * dmxr); +} + +/* + * Convert a compressed bmap extent record to an uncompressed form. + * This code must be in sync with the routines xfs_bmbt_get_startoff, + * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. + */ +STATIC void +__xfs_bmbt_get_all( + __uint64_t l0, + __uint64_t l1, + xfs_bmbt_irec_t *s) +{ + int ext_flag; + xfs_exntst_t st; + + ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); + s->br_startoff = ((xfs_fileoff_t)l0 & + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; +#if XFS_BIG_BLKNOS + s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | + (((xfs_fsblock_t)l1) >> 21); +#else +#ifdef DEBUG + { + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | + (((xfs_dfsbno_t)l1) >> 21); + ASSERT((b >> 32) == 0 || isnulldstartblock(b)); + s->br_startblock = (xfs_fsblock_t)b; + } +#else /* !DEBUG */ + s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_BLKNOS */ + s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); + /* This is xfs_extent_state() in-line */ + if (ext_flag) { + ASSERT(s->br_blockcount != 0); /* saved for DMIG */ + st = XFS_EXT_UNWRITTEN; + } else + st = XFS_EXT_NORM; + s->br_state = st; +} + +void +xfs_bmbt_get_all( + xfs_bmbt_rec_host_t *r, + xfs_bmbt_irec_t *s) +{ + __xfs_bmbt_get_all(r->l0, r->l1, s); +} + +/* + * Extract the blockcount field from an in memory bmap extent record. + */ +xfs_filblks_t +xfs_bmbt_get_blockcount( + xfs_bmbt_rec_host_t *r) +{ + return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); +} + +/* + * Extract the startblock field from an in memory bmap extent record. + */ +xfs_fsblock_t +xfs_bmbt_get_startblock( + xfs_bmbt_rec_host_t *r) +{ +#if XFS_BIG_BLKNOS + return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | + (((xfs_fsblock_t)r->l1) >> 21); +#else +#ifdef DEBUG + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | + (((xfs_dfsbno_t)r->l1) >> 21); + ASSERT((b >> 32) == 0 || isnulldstartblock(b)); + return (xfs_fsblock_t)b; +#else /* !DEBUG */ + return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_BLKNOS */ +} + +/* + * Extract the startoff field from an in memory bmap extent record. + */ +xfs_fileoff_t +xfs_bmbt_get_startoff( + xfs_bmbt_rec_host_t *r) +{ + return ((xfs_fileoff_t)r->l0 & + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; +} + +xfs_exntst_t +xfs_bmbt_get_state( + xfs_bmbt_rec_host_t *r) +{ + int ext_flag; + + ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN)); + return xfs_extent_state(xfs_bmbt_get_blockcount(r), + ext_flag); +} + +/* + * Extract the blockcount field from an on disk bmap extent record. + */ +xfs_filblks_t +xfs_bmbt_disk_get_blockcount( + xfs_bmbt_rec_t *r) +{ + return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); +} + +/* + * Extract the startoff field from a disk format bmap extent record. + */ +xfs_fileoff_t +xfs_bmbt_disk_get_startoff( + xfs_bmbt_rec_t *r) +{ + return ((xfs_fileoff_t)be64_to_cpu(r->l0) & + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; +} + + +/* + * Set all the fields in a bmap extent record from the arguments. + */ +void +xfs_bmbt_set_allf( + xfs_bmbt_rec_host_t *r, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state) +{ + int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; + + ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); + ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); + ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); + +#if XFS_BIG_BLKNOS + ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); + + r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9) | + ((xfs_bmbt_rec_base_t)startblock >> 43); + r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); +#else /* !XFS_BIG_BLKNOS */ + if (isnullstartblock(startblock)) { + r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9) | + (xfs_bmbt_rec_base_t)xfs_mask64lo(9); + r->l1 = xfs_mask64hi(11) | + ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); + } else { + r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9); + r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); + } +#endif /* XFS_BIG_BLKNOS */ +} + +/* + * Set all the fields in a bmap extent record from the uncompressed form. + */ +void +xfs_bmbt_set_all( + xfs_bmbt_rec_host_t *r, + xfs_bmbt_irec_t *s) +{ + xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock, + s->br_blockcount, s->br_state); +} + + +/* + * Set all the fields in a disk format bmap extent record from the arguments. + */ +void +xfs_bmbt_disk_set_allf( + xfs_bmbt_rec_t *r, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state) +{ + int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; + + ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); + ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); + ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); + +#if XFS_BIG_BLKNOS + ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); + + r->l0 = cpu_to_be64( + ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9) | + ((xfs_bmbt_rec_base_t)startblock >> 43)); + r->l1 = cpu_to_be64( + ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); +#else /* !XFS_BIG_BLKNOS */ + if (isnullstartblock(startblock)) { + r->l0 = cpu_to_be64( + ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9) | + (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); + r->l1 = cpu_to_be64(xfs_mask64hi(11) | + ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); + } else { + r->l0 = cpu_to_be64( + ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)startoff << 9)); + r->l1 = cpu_to_be64( + ((xfs_bmbt_rec_base_t)startblock << 21) | + ((xfs_bmbt_rec_base_t)blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); + } +#endif /* XFS_BIG_BLKNOS */ +} + +/* + * Set all the fields in a bmap extent record from the uncompressed form. + */ +STATIC void +xfs_bmbt_disk_set_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock, + s->br_blockcount, s->br_state); +} + +/* + * Set the blockcount field in a bmap extent record. + */ +void +xfs_bmbt_set_blockcount( + xfs_bmbt_rec_host_t *r, + xfs_filblks_t v) +{ + ASSERT((v & xfs_mask64hi(43)) == 0); + r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | + (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); +} + +/* + * Set the startblock field in a bmap extent record. + */ +void +xfs_bmbt_set_startblock( + xfs_bmbt_rec_host_t *r, + xfs_fsblock_t v) +{ +#if XFS_BIG_BLKNOS + ASSERT((v & xfs_mask64hi(12)) == 0); + r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | + (xfs_bmbt_rec_base_t)(v >> 43); + r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | + (xfs_bmbt_rec_base_t)(v << 21); +#else /* !XFS_BIG_BLKNOS */ + if (isnullstartblock(v)) { + r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); + r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | + ((xfs_bmbt_rec_base_t)v << 21) | + (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); + } else { + r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); + r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | + (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); + } +#endif /* XFS_BIG_BLKNOS */ +} + +/* + * Set the startoff field in a bmap extent record. + */ +void +xfs_bmbt_set_startoff( + xfs_bmbt_rec_host_t *r, + xfs_fileoff_t v) +{ + ASSERT((v & xfs_mask64hi(9)) == 0); + r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | + ((xfs_bmbt_rec_base_t)v << 9) | + (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); +} + +/* + * Set the extent state field in a bmap extent record. + */ +void +xfs_bmbt_set_state( + xfs_bmbt_rec_host_t *r, + xfs_exntst_t v) +{ + ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); + if (v == XFS_EXT_NORM) + r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); + else + r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); +} + +/* + * Convert in-memory form of btree root to on-disk form. + */ +void +xfs_bmbt_to_bmdr( + struct xfs_mount *mp, + struct xfs_btree_block *rblock, + int rblocklen, + xfs_bmdr_block_t *dblock, + int dblocklen) +{ + int dmxr; + xfs_bmbt_key_t *fkp; + __be64 *fpp; + xfs_bmbt_key_t *tkp; + __be64 *tpp; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); + ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)); + ASSERT(rblock->bb_u.l.bb_blkno == + cpu_to_be64(XFS_BUF_DADDR_NULL)); + } else + ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); + ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_level != 0); + dblock->bb_level = rblock->bb_level; + dblock->bb_numrecs = rblock->bb_numrecs; + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); + fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + tkp = XFS_BMDR_KEY_ADDR(dblock, 1); + fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); + tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); + dmxr = be16_to_cpu(dblock->bb_numrecs); + memcpy(tkp, fkp, sizeof(*fkp) * dmxr); + memcpy(tpp, fpp, sizeof(*fpp) * dmxr); +} + +/* + * Check extent records, which have just been read, for + * any bit in the extent flag field. ASSERT on debug + * kernels, as this condition should not occur. + * Return an error condition (1) if any flags found, + * otherwise return 0. + */ + +int +xfs_check_nostate_extents( + xfs_ifork_t *ifp, + xfs_extnum_t idx, + xfs_extnum_t num) +{ + for (; num > 0; num--, idx++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); + if ((ep->l0 >> + (64 - BMBT_EXNTFLAG_BITLEN)) != 0) { + ASSERT(0); + return 1; + } + } + return 0; +} + + +STATIC struct xfs_btree_cur * +xfs_bmbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + struct xfs_btree_cur *new; + + new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.b.ip, cur->bc_private.b.whichfork); + + /* + * Copy the firstblock, flist, and flags values, + * since init cursor doesn't get them. + */ + new->bc_private.b.firstblock = cur->bc_private.b.firstblock; + new->bc_private.b.flist = cur->bc_private.b.flist; + new->bc_private.b.flags = cur->bc_private.b.flags; + + return new; +} + +STATIC void +xfs_bmbt_update_cursor( + struct xfs_btree_cur *src, + struct xfs_btree_cur *dst) +{ + ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || + (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); + ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); + + dst->bc_private.b.allocated += src->bc_private.b.allocated; + dst->bc_private.b.firstblock = src->bc_private.b.firstblock; + + src->bc_private.b.allocated = 0; +} + +STATIC int +xfs_bmbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = cur->bc_private.b.firstblock; + args.firstblock = args.fsbno; + + if (args.fsbno == NULLFSBLOCK) { + args.fsbno = be64_to_cpu(start->l); + args.type = XFS_ALLOCTYPE_START_BNO; + /* + * Make sure there is sufficient room left in the AG to + * complete a full tree split for an extent insert. If + * we are converting the middle part of an extent then + * we may need space for two tree splits. + * + * We are relying on the caller to make the correct block + * reservation for this operation to succeed. If the + * reservation amount is insufficient then we may fail a + * block allocation here and corrupt the filesystem. + */ + args.minleft = xfs_trans_get_block_res(args.tp); + } else if (cur->bc_private.b.flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { + error = ENOSPC; + goto error0; + } + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + + if (args.fsbno == NULLFSBLOCK && args.minleft) { + /* + * Could not find an AG with enough free space to satisfy + * a full btree split. Try again without minleft and if + * successful activate the lowspace algorithm. + */ + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.minleft = 0; + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + cur->bc_private.b.flist->xbf_low = 1; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + cur->bc_private.b.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); + xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, 1L); + + new->l = cpu_to_be64(args.fsbno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + + error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +STATIC int +xfs_bmbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_trans *tp = cur->bc_tp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + + xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp); + ip->i_d.di_nblocks--; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, bp); + return 0; +} + +STATIC int +xfs_bmbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0) / 2; + } + + return cur->bc_mp->m_bmap_dmnr[level != 0]; +} + +int +xfs_bmbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0); + } + + return cur->bc_mp->m_bmap_dmxr[level != 0]; + +} + +/* + * Get the maximum records we could store in the on-disk format. + * + * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but + * for the root node this checks the available space in the dinode fork + * so that we can resize the in-memory buffer to match it. After a + * resize to the maximum size this function returns the same value + * as xfs_bmbt_get_maxrecs for the root node, too. + */ +STATIC int +xfs_bmbt_get_dmaxrecs( + struct xfs_btree_cur *cur, + int level) +{ + if (level != cur->bc_nlevels - 1) + return cur->bc_mp->m_bmap_dmxr[level != 0]; + return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); +} + +STATIC void +xfs_bmbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->bmbt.br_startoff = + cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); +} + +STATIC void +xfs_bmbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->bmbt.br_startoff != 0); + + xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), + 0, 0, XFS_EXT_NORM); +} + +STATIC void +xfs_bmbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); +} + +STATIC void +xfs_bmbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + ptr->l = 0; +} + +STATIC __int64_t +xfs_bmbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - + cur->bc_rec.b.br_startoff; +} + +static bool +xfs_bmbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + unsigned int level; + + switch (block->bb_magic) { + case cpu_to_be32(XFS_BMAP_CRC_MAGIC): + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) + return false; + /* + * XXX: need a better way of verifying the owner here. Right now + * just make sure there has been one set. + */ + if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) + return false; + /* fall through */ + case cpu_to_be32(XFS_BMAP_MAGIC): + break; + default: + return false; + } + + /* + * numrecs and level verification. + * + * We don't know what fork we belong to, so just verify that the level + * is less than the maximum of the two. Later checks will be more + * precise. + */ + level = be16_to_cpu(block->bb_level); + if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) + return false; + if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) + return false; + + /* sibling pointer verification */ + if (!block->bb_u.l.bb_leftsib || + (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && + !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) + return false; + if (!block->bb_u.l.bb_rightsib || + (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && + !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) + return false; + + return true; +} + +static void +xfs_bmbt_read_verify( + struct xfs_buf *bp) +{ + if (!xfs_btree_lblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_bmbt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); + } +} + +static void +xfs_bmbt_write_verify( + struct xfs_buf *bp) +{ + if (!xfs_bmbt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + xfs_btree_lblock_calc_crc(bp); +} + +const struct xfs_buf_ops xfs_bmbt_buf_ops = { + .verify_read = xfs_bmbt_read_verify, + .verify_write = xfs_bmbt_write_verify, +}; + + +#if defined(DEBUG) || defined(XFS_WARN) +STATIC int +xfs_bmbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be64_to_cpu(k1->bmbt.br_startoff) < + be64_to_cpu(k2->bmbt.br_startoff); +} + +STATIC int +xfs_bmbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return xfs_bmbt_disk_get_startoff(&r1->bmbt) + + xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= + xfs_bmbt_disk_get_startoff(&r2->bmbt); +} +#endif /* DEBUG */ + +static const struct xfs_btree_ops xfs_bmbt_ops = { + .rec_len = sizeof(xfs_bmbt_rec_t), + .key_len = sizeof(xfs_bmbt_key_t), + + .dup_cursor = xfs_bmbt_dup_cursor, + .update_cursor = xfs_bmbt_update_cursor, + .alloc_block = xfs_bmbt_alloc_block, + .free_block = xfs_bmbt_free_block, + .get_maxrecs = xfs_bmbt_get_maxrecs, + .get_minrecs = xfs_bmbt_get_minrecs, + .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, + .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_rec_from_key = xfs_bmbt_init_rec_from_key, + .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, + .key_diff = xfs_bmbt_key_diff, + .buf_ops = &xfs_bmbt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_bmbt_keys_inorder, + .recs_inorder = xfs_bmbt_recs_inorder, +#endif +}; + +/* + * Allocate a new bmap btree cursor. + */ +struct xfs_btree_cur * /* new bmap btree cursor */ +xfs_bmbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* inode owning the btree */ + int whichfork) /* data or attr fork */ +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_btnum = XFS_BTNUM_BMAP; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_bmbt_ops; + cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; + if (xfs_sb_version_hascrc(&mp->m_sb)) + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + + cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_private.b.ip = ip; + cur->bc_private.b.firstblock = NULLFSBLOCK; + cur->bc_private.b.flist = NULL; + cur->bc_private.b.allocated = 0; + cur->bc_private.b.flags = 0; + cur->bc_private.b.whichfork = whichfork; + + return cur; +} + +/* + * Calculate number of records in a bmap btree block. + */ +int +xfs_bmbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= XFS_BMBT_BLOCK_LEN(mp); + + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + +/* + * Calculate number of records in a bmap btree inode root. + */ +int +xfs_bmdr_maxrecs( + int blocklen, + int leaf) +{ + blocklen -= sizeof(xfs_bmdr_block_t); + + if (leaf) + return blocklen / sizeof(xfs_bmdr_rec_t); + return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); +} + +/* + * Change the owner of a btree format fork fo the inode passed in. Change it to + * the owner of that is passed in so that we can change owners before or after + * we switch forks between inodes. The operation that the caller is doing will + * determine whether is needs to change owner before or after the switch. + * + * For demand paged transactional modification, the fork switch should be done + * after reading in all the blocks, modifying them and pinning them in the + * transaction. For modification when the buffers are already pinned in memory, + * the fork switch can be done before changing the owner as we won't need to + * validate the owner until the btree buffers are unpinned and writes can occur + * again. + * + * For recovery based ownership change, there is no transactional context and + * so a buffer list must be supplied so that we can record the buffers that we + * modified for the caller to issue IO on. + */ +int +xfs_bmbt_change_owner( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + xfs_ino_t new_owner, + struct list_head *buffer_list) +{ + struct xfs_btree_cur *cur; + int error; + + ASSERT(tp || buffer_list); + ASSERT(!(tp && buffer_list)); + if (whichfork == XFS_DATA_FORK) + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); + else + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); + + cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); + if (!cur) + return ENOMEM; + + error = xfs_btree_change_owner(cur, new_owner, buffer_list); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + return error; +} diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c new file mode 100644 index 000000000000..036b4fd34bf7 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree.c @@ -0,0 +1,3989 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_buf_item.h" +#include "xfs_btree.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" + +/* + * Cursor allocation zone. + */ +kmem_zone_t *xfs_btree_cur_zone; + +/* + * Btree magic numbers. + */ +static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + XFS_FIBT_MAGIC }, + { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } +}; +#define xfs_btree_magic(cur) \ + xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] + + +STATIC int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* btree long form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer for block, if any */ +{ + int lblock_ok = 1; /* block passes checks */ + struct xfs_mount *mp; /* file system mount point */ + + mp = cur->bc_mp; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + lblock_ok = lblock_ok && + uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && + block->bb_u.l.bb_blkno == cpu_to_be64( + bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + } + + lblock_ok = lblock_ok && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be16_to_cpu(block->bb_level) == level && + be16_to_cpu(block->bb_numrecs) <= + cur->bc_ops->get_maxrecs(cur, level) && + block->bb_u.l.bb_leftsib && + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_leftsib))) && + block->bb_u.l.bb_rightsib && + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_rightsib))); + + if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, + XFS_ERRTAG_BTREE_CHECK_LBLOCK, + XFS_RANDOM_BTREE_CHECK_LBLOCK))) { + if (bp) + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + return 0; +} + +STATIC int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* btree short form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer containing block */ +{ + struct xfs_mount *mp; /* file system mount point */ + struct xfs_buf *agbp; /* buffer for ag. freespace struct */ + struct xfs_agf *agf; /* ag. freespace structure */ + xfs_agblock_t agflen; /* native ag. freespace length */ + int sblock_ok = 1; /* block passes checks */ + + mp = cur->bc_mp; + agbp = cur->bc_private.a.agbp; + agf = XFS_BUF_TO_AGF(agbp); + agflen = be32_to_cpu(agf->agf_length); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + sblock_ok = sblock_ok && + uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && + block->bb_u.s.bb_blkno == cpu_to_be64( + bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + } + + sblock_ok = sblock_ok && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be16_to_cpu(block->bb_level) == level && + be16_to_cpu(block->bb_numrecs) <= + cur->bc_ops->get_maxrecs(cur, level) && + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && + block->bb_u.s.bb_leftsib && + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && + block->bb_u.s.bb_rightsib; + + if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, + XFS_ERRTAG_BTREE_CHECK_SBLOCK, + XFS_RANDOM_BTREE_CHECK_SBLOCK))) { + if (bp) + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + return 0; +} + +/* + * Debug routine: check that block header is ok. + */ +int +xfs_btree_check_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* generic btree block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer containing block, if any */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_check_lblock(cur, block, level, bp); + else + return xfs_btree_check_sblock(cur, block, level, bp); +} + +/* + * Check that (long) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_dfsbno_t bno, /* btree block disk address */ + int level) /* btree block level */ +{ + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + bno != NULLDFSBNO && + XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); + return 0; +} + +#ifdef DEBUG +/* + * Check that (short) pointer is ok. + */ +STATIC int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* btree block disk address */ + int level) /* btree block level */ +{ + xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; + + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + bno != NULLAGBLOCK && + bno != 0 && + bno < agblocks); + return 0; +} + +/* + * Check that block ptr is ok. + */ +STATIC int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_ptr( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_ptr *ptr, /* btree block disk address */ + int index, /* offset from ptr to check */ + int level) /* btree block level */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + return xfs_btree_check_lptr(cur, + be64_to_cpu((&ptr->l)[index]), level); + } else { + return xfs_btree_check_sptr(cur, + be32_to_cpu((&ptr->s)[index]), level); + } +} +#endif + +/* + * Calculate CRC on the whole btree block and stuff it into the + * long-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_lblock_calc_crc( + struct xfs_buf *bp) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return; + if (bip) + block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); +} + +bool +xfs_btree_lblock_verify_crc( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + + return true; +} + +/* + * Calculate CRC on the whole btree block and stuff it into the + * short-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_sblock_calc_crc( + struct xfs_buf *bp) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return; + if (bip) + block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); +} + +bool +xfs_btree_sblock_verify_crc( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + + return true; +} + +/* + * Delete the btree cursor. + */ +void +xfs_btree_del_cursor( + xfs_btree_cur_t *cur, /* btree cursor */ + int error) /* del because of error */ +{ + int i; /* btree level */ + + /* + * Clear the buffer pointers, and release the buffers. + * If we're doing this in the face of an error, we + * need to make sure to inspect all of the entries + * in the bc_bufs array for buffers to be unlocked. + * This is because some of the btree code works from + * level n down to 0, and if we get an error along + * the way we won't have initialized all the entries + * down to 0. + */ + for (i = 0; i < cur->bc_nlevels; i++) { + if (cur->bc_bufs[i]) + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); + else if (!error) + break; + } + /* + * Can't free a bmap cursor without having dealt with the + * allocated indirect blocks' accounting. + */ + ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || + cur->bc_private.b.allocated == 0); + /* + * Free the cursor. + */ + kmem_zone_free(xfs_btree_cur_zone, cur); +} + +/* + * Duplicate the btree cursor. + * Allocate a new one, copy the record, re-get the buffers. + */ +int /* error */ +xfs_btree_dup_cursor( + xfs_btree_cur_t *cur, /* input cursor */ + xfs_btree_cur_t **ncur) /* output cursor */ +{ + xfs_buf_t *bp; /* btree block's buffer pointer */ + int error; /* error return value */ + int i; /* level number of btree block */ + xfs_mount_t *mp; /* mount structure for filesystem */ + xfs_btree_cur_t *new; /* new cursor value */ + xfs_trans_t *tp; /* transaction pointer, can be NULL */ + + tp = cur->bc_tp; + mp = cur->bc_mp; + + /* + * Allocate a new cursor like the old one. + */ + new = cur->bc_ops->dup_cursor(cur); + + /* + * Copy the record currently in the cursor. + */ + new->bc_rec = cur->bc_rec; + + /* + * For each level current, re-get the buffer and copy the ptr value. + */ + for (i = 0; i < new->bc_nlevels; i++) { + new->bc_ptrs[i] = cur->bc_ptrs[i]; + new->bc_ra[i] = cur->bc_ra[i]; + bp = cur->bc_bufs[i]; + if (bp) { + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_BUF_ADDR(bp), mp->m_bsize, + 0, &bp, + cur->bc_ops->buf_ops); + if (error) { + xfs_btree_del_cursor(new, error); + *ncur = NULL; + return error; + } + } + new->bc_bufs[i] = bp; + } + *ncur = new; + return 0; +} + +/* + * XFS btree block layout and addressing: + * + * There are two types of blocks in the btree: leaf and non-leaf blocks. + * + * The leaf record start with a header then followed by records containing + * the values. A non-leaf block also starts with the same header, and + * then first contains lookup keys followed by an equal number of pointers + * to the btree blocks at the previous level. + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * The header is called struct xfs_btree_block for reasons better left unknown + * and comes in different versions for short (32bit) and long (64bit) block + * pointers. The record and key structures are defined by the btree instances + * and opaque to the btree core. The block pointers are simple disk endian + * integers, available in a short (32bit) and long (64bit) variant. + * + * The helpers below calculate the offset of a given record, key or pointer + * into a btree block (xfs_btree_*_offset) or return a pointer to the given + * record, key or pointer (xfs_btree_*_addr). Note that all addressing + * inside the btree block is done using indices starting at one, not zero! + */ + +/* + * Return size of the btree block header for this btree instance. + */ +static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + return XFS_BTREE_LBLOCK_CRC_LEN; + return XFS_BTREE_LBLOCK_LEN; + } + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + return XFS_BTREE_SBLOCK_CRC_LEN; + return XFS_BTREE_SBLOCK_LEN; +} + +/* + * Return size of btree block pointers for this btree instance. + */ +static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) +{ + return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + sizeof(__be64) : sizeof(__be32); +} + +/* + * Calculate offset of the n-th record in a btree block. + */ +STATIC size_t +xfs_btree_rec_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->rec_len; +} + +/* + * Calculate offset of the n-th key in a btree block. + */ +STATIC size_t +xfs_btree_key_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->key_len; +} + +/* + * Calculate offset of the n-th block pointer in a btree block. + */ +STATIC size_t +xfs_btree_ptr_offset( + struct xfs_btree_cur *cur, + int n, + int level) +{ + return xfs_btree_block_len(cur) + + cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + + (n - 1) * xfs_btree_ptr_len(cur); +} + +/* + * Return a pointer to the n-th record in the btree block. + */ +STATIC union xfs_btree_rec * +xfs_btree_rec_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_rec *) + ((char *)block + xfs_btree_rec_offset(cur, n)); +} + +/* + * Return a pointer to the n-th key in the btree block. + */ +STATIC union xfs_btree_key * +xfs_btree_key_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_key *) + ((char *)block + xfs_btree_key_offset(cur, n)); +} + +/* + * Return a pointer to the n-th block pointer in the btree block. + */ +STATIC union xfs_btree_ptr * +xfs_btree_ptr_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + int level = xfs_btree_get_level(block); + + ASSERT(block->bb_level != 0); + + return (union xfs_btree_ptr *) + ((char *)block + xfs_btree_ptr_offset(cur, n, level)); +} + +/* + * Get the root block which is stored in the inode. + * + * For now this btree implementation assumes the btree root is always + * stored in the if_broot field of an inode fork. + */ +STATIC struct xfs_btree_block * +xfs_btree_get_iroot( + struct xfs_btree_cur *cur) +{ + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); + return (struct xfs_btree_block *)ifp->if_broot; +} + +/* + * Retrieve the block pointer from the cursor at the given level. + * This may be an inode btree root or from a buffer. + */ +STATIC struct xfs_btree_block * /* generic btree block pointer */ +xfs_btree_get_block( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree */ + struct xfs_buf **bpp) /* buffer containing the block */ +{ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *bpp = NULL; + return xfs_btree_get_iroot(cur); + } + + *bpp = cur->bc_bufs[level]; + return XFS_BUF_TO_BLOCK(*bpp); +} + +/* + * Get a buffer for the block, return it with no data read. + * Long-form addressing. + */ +xfs_buf_t * /* buffer for fsbno */ +xfs_btree_get_bufl( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock) /* lock flags for get_buf */ +{ + xfs_daddr_t d; /* real disk block address */ + + ASSERT(fsbno != NULLFSBLOCK); + d = XFS_FSB_TO_DADDR(mp, fsbno); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); +} + +/* + * Get a buffer for the block, return it with no data read. + * Short-form addressing. + */ +xfs_buf_t * /* buffer for agno/agbno */ +xfs_btree_get_bufs( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + uint lock) /* lock flags for get_buf */ +{ + xfs_daddr_t d; /* real disk block address */ + + ASSERT(agno != NULLAGNUMBER); + ASSERT(agbno != NULLAGBLOCK); + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); +} + +/* + * Check for the cursor referring to the last block at the given level. + */ +int /* 1=is last block, 0=not last block */ +xfs_btree_islastblock( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to check */ +{ + struct xfs_btree_block *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); + else + return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); +} + +/* + * Change the cursor to point to the first record at the given level. + * Other levels are unaffected. + */ +STATIC int /* success=1, failure=0 */ +xfs_btree_firstrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + struct xfs_btree_block *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (!block->bb_numrecs) + return 0; + /* + * Set the ptr value to 1, that's the first record/key. + */ + cur->bc_ptrs[level] = 1; + return 1; +} + +/* + * Change the cursor to point to the last record in the current block + * at the given level. Other levels are unaffected. + */ +STATIC int /* success=1, failure=0 */ +xfs_btree_lastrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + struct xfs_btree_block *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (!block->bb_numrecs) + return 0; + /* + * Set the ptr value to numrecs, that's the last record/key. + */ + cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); + return 1; +} + +/* + * Compute first and last byte offsets for the fields given. + * Interprets the offsets table, which contains struct field offsets. + */ +void +xfs_btree_offsets( + __int64_t fields, /* bitmask of fields */ + const short *offsets, /* table of field offsets */ + int nbits, /* number of bits to inspect */ + int *first, /* output: first byte offset */ + int *last) /* output: last byte offset */ +{ + int i; /* current bit number */ + __int64_t imask; /* mask for current bit number */ + + ASSERT(fields != 0); + /* + * Find the lowest bit, so the first byte offset. + */ + for (i = 0, imask = 1LL; ; i++, imask <<= 1) { + if (imask & fields) { + *first = offsets[i]; + break; + } + } + /* + * Find the highest bit, so the last byte offset. + */ + for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { + if (imask & fields) { + *last = offsets[i + 1] - 1; + break; + } + } +} + +/* + * Get a buffer for the block, return it read in. + * Long-form addressing. + */ +int +xfs_btree_read_bufl( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock, /* lock flags for read_buf */ + struct xfs_buf **bpp, /* buffer for fsbno */ + int refval, /* ref count value for buffer */ + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; /* return value */ + xfs_daddr_t d; /* real disk block address */ + int error; + + ASSERT(fsbno != NULLFSBLOCK); + d = XFS_FSB_TO_DADDR(mp, fsbno); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp, ops); + if (error) + return error; + if (bp) + xfs_buf_set_ref(bp, refval); + *bpp = bp; + return 0; +} + +/* + * Read-ahead the block, don't wait for it, don't return a buffer. + * Long-form addressing. + */ +/* ARGSUSED */ +void +xfs_btree_reada_bufl( + struct xfs_mount *mp, /* file system mount point */ + xfs_fsblock_t fsbno, /* file system block number */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops) +{ + xfs_daddr_t d; + + ASSERT(fsbno != NULLFSBLOCK); + d = XFS_FSB_TO_DADDR(mp, fsbno); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); +} + +/* + * Read-ahead the block, don't wait for it, don't return a buffer. + * Short-form addressing. + */ +/* ARGSUSED */ +void +xfs_btree_reada_bufs( + struct xfs_mount *mp, /* file system mount point */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops) +{ + xfs_daddr_t d; + + ASSERT(agno != NULLAGNUMBER); + ASSERT(agbno != NULLAGBLOCK); + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); +} + +STATIC int +xfs_btree_readahead_lblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, left, 1, + cur->bc_ops->buf_ops); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, right, 1, + cur->bc_ops->buf_ops); + rval++; + } + + return rval; +} + +STATIC int +xfs_btree_readahead_sblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); + xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); + + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + left, 1, cur->bc_ops->buf_ops); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + right, 1, cur->bc_ops->buf_ops); + rval++; + } + + return rval; +} + +/* + * Read-ahead btree blocks, at the given level. + * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. + */ +STATIC int +xfs_btree_readahead( + struct xfs_btree_cur *cur, /* btree cursor */ + int lev, /* level in btree */ + int lr) /* left/right bits */ +{ + struct xfs_btree_block *block; + + /* + * No readahead needed if we are at the root level and the + * btree root is stored in the inode. + */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (lev == cur->bc_nlevels - 1)) + return 0; + + if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + return 0; + + cur->bc_ra[lev] |= lr; + block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_readahead_lblock(cur, lr, block); + return xfs_btree_readahead_sblock(cur, lr, block); +} + +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + + return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + } else { + ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); + ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + + return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + be32_to_cpu(ptr->s)); + } +} + +/* + * Readahead @count btree blocks at the given @ptr location. + * + * We don't need to care about long or short form btrees here as we have a + * method of converting the ptr directly to a daddr available to us. + */ +STATIC void +xfs_btree_readahead_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + xfs_extlen_t count) +{ + xfs_buf_readahead(cur->bc_mp->m_ddev_targp, + xfs_btree_ptr_to_daddr(cur, ptr), + cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); +} + +/* + * Set the buffer for level "lev" in the cursor to bp, releasing + * any previous buffer. + */ +STATIC void +xfs_btree_setbuf( + xfs_btree_cur_t *cur, /* btree cursor */ + int lev, /* level in btree */ + xfs_buf_t *bp) /* new buffer to set */ +{ + struct xfs_btree_block *b; /* btree block */ + + if (cur->bc_bufs[lev]) + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); + cur->bc_bufs[lev] = bp; + cur->bc_ra[lev] = 0; + + b = XFS_BUF_TO_BLOCK(bp); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) + cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) + cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + } else { + if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) + cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) + cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + } +} + +STATIC int +xfs_btree_ptr_is_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return ptr->l == cpu_to_be64(NULLDFSBNO); + else + return ptr->s == cpu_to_be32(NULLAGBLOCK); +} + +STATIC void +xfs_btree_set_ptr_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(NULLDFSBNO); + else + ptr->s = cpu_to_be32(NULLAGBLOCK); +} + +/* + * Get/set/init sibling pointers + */ +STATIC void +xfs_btree_get_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + ptr->l = block->bb_u.l.bb_rightsib; + else + ptr->l = block->bb_u.l.bb_leftsib; + } else { + if (lr == XFS_BB_RIGHTSIB) + ptr->s = block->bb_u.s.bb_rightsib; + else + ptr->s = block->bb_u.s.bb_leftsib; + } +} + +STATIC void +xfs_btree_set_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.l.bb_rightsib = ptr->l; + else + block->bb_u.l.bb_leftsib = ptr->l; + } else { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.s.bb_rightsib = ptr->s; + else + block->bb_u.s.bb_leftsib = ptr->s; + } +} + +void +xfs_btree_init_block_int( + struct xfs_mount *mp, + struct xfs_btree_block *buf, + xfs_daddr_t blkno, + __u32 magic, + __u16 level, + __u16 numrecs, + __u64 owner, + unsigned int flags) +{ + buf->bb_magic = cpu_to_be32(magic); + buf->bb_level = cpu_to_be16(level); + buf->bb_numrecs = cpu_to_be16(numrecs); + + if (flags & XFS_BTREE_LONG_PTRS) { + buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + if (flags & XFS_BTREE_CRC_BLOCKS) { + buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); + buf->bb_u.l.bb_owner = cpu_to_be64(owner); + uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); + buf->bb_u.l.bb_pad = 0; + buf->bb_u.l.bb_lsn = 0; + } + } else { + /* owner is a 32 bit value on short blocks */ + __u32 __owner = (__u32)owner; + + buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); + if (flags & XFS_BTREE_CRC_BLOCKS) { + buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); + buf->bb_u.s.bb_owner = cpu_to_be32(__owner); + uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); + buf->bb_u.s.bb_lsn = 0; + } + } +} + +void +xfs_btree_init_block( + struct xfs_mount *mp, + struct xfs_buf *bp, + __u32 magic, + __u16 level, + __u16 numrecs, + __u64 owner, + unsigned int flags) +{ + xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, + magic, level, numrecs, owner, flags); +} + +STATIC void +xfs_btree_init_block_cur( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + int numrecs) +{ + __u64 owner; + + /* + * we can pull the owner from the cursor right now as the different + * owners align directly with the pointer size of the btree. This may + * change in future, but is safe for current users of the generic btree + * code. + */ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + owner = cur->bc_private.b.ip->i_ino; + else + owner = cur->bc_private.a.agno; + + xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, + xfs_btree_magic(cur), level, numrecs, + owner, cur->bc_flags); +} + +/* + * Return true if ptr is the last record in the btree and + * we need to track updates to this record. The decision + * will be further refined in the update_lastrec method. + */ +STATIC int +xfs_btree_is_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level) +{ + union xfs_btree_ptr ptr; + + if (level > 0) + return 0; + if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) + return 0; + + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &ptr)) + return 0; + return 1; +} + +STATIC void +xfs_btree_buf_to_ptr( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, + XFS_BUF_ADDR(bp))); + else { + ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, + XFS_BUF_ADDR(bp))); + } +} + +STATIC void +xfs_btree_set_refs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + switch (cur->bc_btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); + break; + case XFS_BTNUM_INO: + case XFS_BTNUM_FINO: + xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); + break; + case XFS_BTNUM_BMAP: + xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); + break; + default: + ASSERT(0); + } +} + +STATIC int +xfs_btree_get_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XBF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags); + + if (!*bpp) + return ENOMEM; + + (*bpp)->b_ops = cur->bc_ops->buf_ops; + *block = XFS_BUF_TO_BLOCK(*bpp); + return 0; +} + +/* + * Read in the buffer at the given ptr and return the buffer and + * the block pointer within the buffer. + */ +STATIC int +xfs_btree_read_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + int error; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XBF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags, bpp, + cur->bc_ops->buf_ops); + if (error) + return error; + + xfs_btree_set_refs(cur, *bpp); + *block = XFS_BUF_TO_BLOCK(*bpp); + return 0; +} + +/* + * Copy keys from one btree block to another. + */ +STATIC void +xfs_btree_copy_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, + union xfs_btree_key *src_key, + int numkeys) +{ + ASSERT(numkeys >= 0); + memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); +} + +/* + * Copy records from one btree block to another. + */ +STATIC void +xfs_btree_copy_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *dst_rec, + union xfs_btree_rec *src_rec, + int numrecs) +{ + ASSERT(numrecs >= 0); + memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); +} + +/* + * Copy block pointers from one btree block to another. + */ +STATIC void +xfs_btree_copy_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *dst_ptr, + union xfs_btree_ptr *src_ptr, + int numptrs) +{ + ASSERT(numptrs >= 0); + memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); +} + +/* + * Shift keys one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + int dir, + int numkeys) +{ + char *dst_key; + + ASSERT(numkeys >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_key = (char *)key + (dir * cur->bc_ops->key_len); + memmove(dst_key, key, numkeys * cur->bc_ops->key_len); +} + +/* + * Shift records one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int dir, + int numrecs) +{ + char *dst_rec; + + ASSERT(numrecs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); + memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); +} + +/* + * Shift block pointers one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int dir, + int numptrs) +{ + char *dst_ptr; + + ASSERT(numptrs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); + memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); +} + +/* + * Log key values from the btree block. + */ +STATIC void +xfs_btree_log_keys( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_key_offset(cur, first), + xfs_btree_key_offset(cur, last + 1) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Log record values from the btree block. + */ +void +xfs_btree_log_recs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_rec_offset(cur, first), + xfs_btree_rec_offset(cur, last + 1) - 1); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_btree_log_ptrs( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int first, /* index of first pointer to log */ + int last) /* index of last pointer to log */ +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + int level = xfs_btree_get_level(block); + + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_ptr_offset(cur, first, level), + xfs_btree_ptr_offset(cur, last + 1, level) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Log fields from a btree block header. + */ +void +xfs_btree_log_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short soffsets[] = { /* table of offsets (short) */ + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), + offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), + offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), + offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), + offsetof(struct xfs_btree_block, bb_u.s.bb_owner), + offsetof(struct xfs_btree_block, bb_u.s.bb_crc), + XFS_BTREE_SBLOCK_CRC_LEN + }; + static const short loffsets[] = { /* table of offsets (long) */ + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), + offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), + offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), + offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), + offsetof(struct xfs_btree_block, bb_u.l.bb_owner), + offsetof(struct xfs_btree_block, bb_u.l.bb_crc), + offsetof(struct xfs_btree_block, bb_u.l.bb_pad), + XFS_BTREE_LBLOCK_CRC_LEN + }; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBI(cur, bp, fields); + + if (bp) { + int nbits; + + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + /* + * We don't log the CRC when updating a btree + * block but instead recreate it during log + * recovery. As the log buffers have checksums + * of their own this is safe and avoids logging a crc + * update in a lot of places. + */ + if (fields == XFS_BB_ALL_BITS) + fields = XFS_BB_ALL_BITS_CRC; + nbits = XFS_BB_NUM_BITS_CRC; + } else { + nbits = XFS_BB_NUM_BITS; + } + xfs_btree_offsets(fields, + (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + loffsets : soffsets, + nbits, &first, &last); + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_increment( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + union xfs_btree_ptr ptr; + struct xfs_buf *bp; + int error; /* error return value */ + int lev; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the right at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* We're done if we remain in the block after the increment. */ + if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + goto out1; + + /* Fail if we just went off the right edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, increment); + + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + block = xfs_btree_get_block(cur, lev, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, lev, bp); + if (error) + goto error0; +#endif + + if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + break; + + /* Read-ahead the right block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + + /* + * If we went off the root then we are either seriously + * confused or have the tree root in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); + if (error) + goto error0; + + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = 1; + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_decrement( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + xfs_buf_t *bp; + int error; /* error return value */ + int lev; + union xfs_btree_ptr ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the left at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + + /* We're done if we remain in the block after the decrement. */ + if (--cur->bc_ptrs[level] > 0) + goto out1; + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we just went off the left edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, decrement); + + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* Read-ahead the left block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + + /* + * If we went off the root then we are seriously confused. + * or the root of the tree is in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); + if (error) + goto error0; + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +STATIC int +xfs_btree_lookup_get_block( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in the btree */ + union xfs_btree_ptr *pp, /* ptr to btree block */ + struct xfs_btree_block **blkp) /* return btree block */ +{ + struct xfs_buf *bp; /* buffer pointer for btree block */ + int error = 0; + + /* special case the root block if in an inode */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *blkp = xfs_btree_get_iroot(cur); + return 0; + } + + /* + * If the old buffer at this level for the disk address we are + * looking for re-use it. + * + * Otherwise throw it away and get a new one. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { + *blkp = XFS_BUF_TO_BLOCK(bp); + return 0; + } + + error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); + if (error) + return error; + + xfs_btree_setbuf(cur, level, bp); + return 0; +} + +/* + * Get current search key. For level 0 we don't actually have a key + * structure so we make one up from the record. For all other levels + * we just return the right key. + */ +STATIC union xfs_btree_key * +xfs_lookup_get_search_key( + struct xfs_btree_cur *cur, + int level, + int keyno, + struct xfs_btree_block *block, + union xfs_btree_key *kp) +{ + if (level == 0) { + cur->bc_ops->init_key_from_rec(kp, + xfs_btree_rec_addr(cur, keyno, block)); + return kp; + } + + return xfs_btree_key_addr(cur, keyno, block); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * stat is set to 0 if can't find any such record, 1 for success. + */ +int /* error */ +xfs_btree_lookup( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* current btree block */ + __int64_t diff; /* difference for the current key */ + int error; /* error return value */ + int keyno; /* current key number */ + int level; /* level in the btree */ + union xfs_btree_ptr *pp; /* ptr to btree block */ + union xfs_btree_ptr ptr; /* ptr to btree block */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, dir); + + XFS_BTREE_STATS_INC(cur, lookup); + + block = NULL; + keyno = 0; + + /* initialise start pointer from cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &ptr); + pp = &ptr; + + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + /* Get the block we need to do the lookup on. */ + error = xfs_btree_lookup_get_block(cur, level, pp, &block); + if (error) + goto error0; + + if (diff == 0) { + /* + * If we already had a key match at a higher level, we + * know we need to use the first entry in this block. + */ + keyno = 1; + } else { + /* Otherwise search this block. Do a binary search. */ + + int high; /* high entry number */ + int low; /* low entry number */ + + /* Set low and high entry numbers, 1-based. */ + low = 1; + high = xfs_btree_get_numrecs(block); + if (!high) { + /* Block is empty, must be an empty leaf. */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Binary search the block. */ + while (low <= high) { + union xfs_btree_key key; + union xfs_btree_key *kp; + + XFS_BTREE_STATS_INC(cur, compare); + + /* keyno is average of low and high. */ + keyno = (low + high) >> 1; + + /* Get current search key */ + kp = xfs_lookup_get_search_key(cur, level, + keyno, block, &key); + + /* + * Compute difference to get next direction: + * - less than, move right + * - greater than, move left + * - equal, we're done + */ + diff = cur->bc_ops->key_diff(cur, kp); + if (diff < 0) + low = keyno + 1; + else if (diff > 0) + high = keyno - 1; + else + break; + } + } + + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + pp = xfs_btree_ptr_addr(cur, keyno, block); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, pp, 0, level); + if (error) + goto error0; +#endif + cur->bc_ptrs[level] = keyno; + } + } + + /* Done with the search. See if we need to adjust the results. */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (dir == XFS_LOOKUP_GE && + keyno > xfs_btree_get_numrecs(block) && + !xfs_btree_ptr_is_null(cur, &ptr)) { + int i; + + cur->bc_ptrs[0] = keyno; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + + /* Return if we succeeded or not. */ + if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) + *stat = 0; + else if (dir != XFS_LOOKUP_EQ || diff == 0) + *stat = 1; + else + *stat = 0; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +STATIC int +xfs_btree_updkey( + struct xfs_btree_cur *cur, + union xfs_btree_key *keyp, + int level) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_key *kp; + int ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIK(cur, level, keyp); + + ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { +#ifdef DEBUG + int error; +#endif + block = xfs_btree_get_block(cur, level, &bp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[level]; + kp = xfs_btree_key_addr(cur, ptr, block); + xfs_btree_copy_keys(cur, kp, keyp, 1); + xfs_btree_log_keys(cur, bp, ptr, ptr); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + +/* + * Update the record referred to by cur to the value in the + * given record. This either works (return 0) or gets an + * EFSCORRUPTED error. + */ +int +xfs_btree_update( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + int error; + int ptr; + union xfs_btree_rec *rp; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGR(cur, rec); + + /* Pick up the current block. */ + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + goto error0; +#endif + /* Get the address of the rec to be updated. */ + ptr = cur->bc_ptrs[0]; + rp = xfs_btree_rec_addr(cur, ptr, block); + + /* Fill in the new contents and log them. */ + xfs_btree_copy_recs(cur, rp, rec, 1); + xfs_btree_log_recs(cur, bp, ptr, ptr); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, 0)) { + cur->bc_ops->update_lastrec(cur, block, rec, + ptr, LASTREC_UPDATE); + } + + /* Updating first rec in leaf. Pass new key value up to our parent. */ + if (ptr == 1) { + union xfs_btree_key key; + + cur->bc_ops->init_key_from_rec(&key, rec); + error = xfs_btree_updkey(cur, &key, 1); + if (error) + goto error0; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_btree_lshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs; /* left record count */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + int rrecs; /* right record count */ + union xfs_btree_ptr lptr; /* left btree pointer */ + union xfs_btree_key *rkp = NULL; /* right btree key */ + union xfs_btree_ptr *rpp = NULL; /* right address pointer */ + union xfs_btree_rec *rrp = NULL; /* right record pointer */ + int error; /* error return value */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) + goto out0; + + /* Set up variables for this block as "right". */ + right = xfs_btree_get_block(cur, level, &rbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, right, level, rbp); + if (error) + goto error0; +#endif + + /* If we've got no left sibling then we can't shift an entry left. */ + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &lptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) + goto out0; + + /* Set up the left neighbor as "left". */ + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + lrecs = xfs_btree_get_numrecs(left); + if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + rrecs = xfs_btree_get_numrecs(right); + + /* + * We add one entry to the left side and remove one for the right side. + * Account for it here, the changes will be updated on disk and logged + * later. + */ + lrecs++; + rrecs--; + + XFS_BTREE_STATS_INC(cur, lshift); + XFS_BTREE_STATS_ADD(cur, moves, 1); + + /* + * If non-leaf, copy a key and a ptr to the left block. + * Log the changes to the left block. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, rpp, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_keys(cur, lkp, rkp, 1); + xfs_btree_copy_ptrs(cur, lpp, rpp, 1); + + xfs_btree_log_keys(cur, lbp, lrecs, lrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); + + ASSERT(cur->bc_ops->keys_inorder(cur, + xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, 1); + xfs_btree_log_recs(cur, lbp, lrecs, lrecs); + + ASSERT(cur->bc_ops->recs_inorder(cur, + xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); + } + + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Slide the contents of right down one entry. + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ +#ifdef DEBUG + int i; /* loop index */ + + for (i = 0; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i + 1, level); + if (error) + goto error0; + } +#endif + xfs_btree_shift_keys(cur, + xfs_btree_key_addr(cur, 2, right), + -1, rrecs); + xfs_btree_shift_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, right), + -1, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + } else { + /* It's a leaf. operate on records */ + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, 2, right), + -1, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, right)); + rkp = &key; + } + + /* Update the parent key values of right. */ + error = xfs_btree_updkey(cur, rkp, level + 1); + if (error) + goto error0; + + /* Slide the cursor value left one. */ + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_btree_rshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + union xfs_btree_ptr rptr; /* right block pointer */ + union xfs_btree_key *rkp; /* right btree key */ + int rrecs; /* right record count */ + int lrecs; /* left record count */ + int error; /* error return value */ + int i; /* loop counter */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) + goto out0; + + /* Set up variables for this block as "left". */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + /* If we've got no right sibling then we can't shift an entry right. */ + xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + lrecs = xfs_btree_get_numrecs(left); + if (cur->bc_ptrs[level] >= lrecs) + goto out0; + + /* Set up the right neighbor as "right". */ + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + rrecs = xfs_btree_get_numrecs(right); + if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, rshift); + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + union xfs_btree_ptr *rpp; + + lkp = xfs_btree_key_addr(cur, lrecs, left); + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = rrecs - 1; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_shift_keys(cur, rkp, 1, rrecs); + xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, lpp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, and log it. */ + xfs_btree_copy_keys(cur, rkp, lkp, 1); + xfs_btree_copy_ptrs(cur, rpp, lpp, 1); + + xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); + + ASSERT(cur->bc_ops->keys_inorder(cur, rkp, + xfs_btree_key_addr(cur, 2, right))); + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *lrp; + union xfs_btree_rec *rrp; + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_shift_recs(cur, rrp, 1, rrecs); + + /* Now put the new data in, and log it. */ + xfs_btree_copy_recs(cur, rrp, lrp, 1); + xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); + + cur->bc_ops->init_key_from_rec(&key, rrp); + rkp = &key; + + ASSERT(cur->bc_ops->recs_inorder(cur, rrp, + xfs_btree_rec_addr(cur, 2, right))); + } + + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + xfs_btree_set_numrecs(left, --lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, ++rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error1; + + error = xfs_btree_updkey(tcur, rkp, level + 1); + if (error) + goto error1; + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + +error1: + XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Split cur/level block in half. + * Return new block number and the key to its first + * record (to be inserted into parent). + */ +STATIC int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rrptr; /* right-right sibling ptr */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + int lrecs; + int rrecs; + int src_index; + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); + + XFS_BTREE_STATS_INC(cur, split); + + /* Set up left block (current one). */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block as "right". */ + error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + + /* Fill in the btree header for the new right block. */ + xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); + + /* + * Split the entries between the old and the new block evenly. + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + lrecs = xfs_btree_get_numrecs(left); + rrecs = lrecs / 2; + if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + rrecs++; + src_index = (lrecs - rrecs + 1); + + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Copy btree block entries from the left block over to the + * new block, the right. Update the right block and log the + * changes. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, src_index, left); + lpp = xfs_btree_ptr_addr(cur, src_index, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = src_index; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_copy_keys(cur, rkp, lkp, rrecs); + xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + + /* Grab the keys to the entries moved to the right block */ + xfs_btree_copy_keys(cur, key, rkp, 1); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, src_index, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, rrp, lrp, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + cur->bc_ops->init_key_from_rec(key, + xfs_btree_rec_addr(cur, 1, right)); + } + + + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + + lrecs -= rrecs; + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); + + xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (!xfs_btree_ptr_is_null(cur, &rrptr)) { + error = xfs_btree_read_buf_block(cur, &rrptr, + 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > lrecs + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= lrecs; + } + /* + * If there are more levels, we'll need another cursor which refers + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + error = xfs_btree_dup_cursor(cur, curp); + if (error) + goto error0; + (*curp)->bc_ptrs[level + 1]++; + } + *ptrp = rptr; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Copy the old inode root contents into a real block and make the + * broot point to it. + */ +int /* error */ +xfs_btree_new_iroot( + struct xfs_btree_cur *cur, /* btree cursor */ + int *logflags, /* logging flags for inode */ + int *stat) /* return status - 0 fail */ +{ + struct xfs_buf *cbp; /* buffer for cblock */ + struct xfs_btree_block *block; /* btree block */ + struct xfs_btree_block *cblock; /* child btree block */ + union xfs_btree_key *ckp; /* child key pointer */ + union xfs_btree_ptr *cpp; /* child ptr pointer */ + union xfs_btree_key *kp; /* pointer to btree key */ + union xfs_btree_ptr *pp; /* pointer to block addr */ + union xfs_btree_ptr nptr; /* new block addr */ + int level; /* btree level */ + int error; /* error return code */ +#ifdef DEBUG + int i; /* loop counter */ +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + + level = cur->bc_nlevels - 1; + + block = xfs_btree_get_iroot(cur); + pp = xfs_btree_ptr_addr(cur, 1, block); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); + if (error) + goto error0; + if (*stat == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + } + XFS_BTREE_STATS_INC(cur, alloc); + + /* Copy the root into a real block. */ + error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); + if (error) + goto error0; + + /* + * we can't just memcpy() the root in for CRC enabled btree blocks. + * In that case have to also ensure the blkno remains correct + */ + memcpy(cblock, block, xfs_btree_block_len(cur)); + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); + else + cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); + } + + be16_add_cpu(&block->bb_level, 1); + xfs_btree_set_numrecs(block, 1); + cur->bc_nlevels++; + cur->bc_ptrs[level + 1] = 1; + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); + + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, &nptr, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_ptrs(cur, pp, &nptr, 1); + + xfs_iroot_realloc(cur->bc_private.b.ip, + 1 - xfs_btree_get_numrecs(cblock), + cur->bc_private.b.whichfork); + + xfs_btree_setbuf(cur, level, cbp); + + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); + xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + + *logflags |= + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); + *stat = 1; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Allocate a new root block, fill it in. + */ +STATIC int /* error */ +xfs_btree_new_root( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* one half of the old root block */ + struct xfs_buf *bp; /* buffer containing block */ + int error; /* error return value */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *nbp; /* new (root) buffer */ + struct xfs_btree_block *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rptr; + union xfs_btree_ptr lptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + /* initialise our start point from the cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &rptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block. */ + error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); + if (error) + goto error0; + + /* Set the root in the holding structure increasing the level by 1. */ + cur->bc_ops->set_root(cur, &lptr, 1); + + /* + * At the previous root level there are now two blocks: the old root, + * and the new block generated when it was split. We don't know which + * one the cursor is pointing at, so we set up variables "left" and + * "right" for each case. + */ + block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); + if (error) + goto error0; +#endif + + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* Our block is left, pick up the right block. */ + lbp = bp; + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + left = block; + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + bp = rbp; + nptr = 1; + } else { + /* Our block is right, pick up the left block. */ + rbp = bp; + xfs_btree_buf_to_ptr(cur, rbp, &rptr); + right = block; + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); + if (error) + goto error0; + bp = lbp; + nptr = 2; + } + /* Fill in the new block's btree header and log it. */ + xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); + xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); + ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && + !xfs_btree_ptr_is_null(cur, &rptr)); + + /* Fill in the key data in the new root. */ + if (xfs_btree_get_level(left) > 0) { + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 1, new), + xfs_btree_key_addr(cur, 1, left), 1); + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 2, new), + xfs_btree_key_addr(cur, 1, right), 1); + } else { + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 1, new), + xfs_btree_rec_addr(cur, 1, left)); + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 2, new), + xfs_btree_rec_addr(cur, 1, right)); + } + xfs_btree_log_keys(cur, nbp, 1, 2); + + /* Fill in the pointer data in the new root. */ + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); + xfs_btree_log_ptrs(cur, nbp, 1, 2); + + /* Fix up the cursor. */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; +} + +STATIC int +xfs_btree_make_block_unfull( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* btree level */ + int numrecs,/* # of recs in block */ + int *oindex,/* old tree index */ + int *index, /* new tree index */ + union xfs_btree_ptr *nptr, /* new btree ptr */ + struct xfs_btree_cur **ncur, /* new btree cursor */ + union xfs_btree_rec *nrec, /* new record */ + int *stat) +{ + union xfs_btree_key key; /* new btree key value */ + int error = 0; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) { + struct xfs_inode *ip = cur->bc_private.b.ip; + + if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { + /* A root block that can be made bigger. */ + xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); + } else { + /* A root block that needs replacing */ + int logflags = 0; + + error = xfs_btree_new_iroot(cur, &logflags, stat); + if (error || *stat == 0) + return error; + + xfs_trans_log_inode(cur->bc_tp, ip, logflags); + } + + return 0; + } + + /* First, try shifting an entry to the right neighbor. */ + error = xfs_btree_rshift(cur, level, stat); + if (error || *stat) + return error; + + /* Next, try shifting an entry to the left neighbor. */ + error = xfs_btree_lshift(cur, level, stat); + if (error) + return error; + + if (*stat) { + *oindex = *index = cur->bc_ptrs[level]; + return 0; + } + + /* + * Next, try splitting the current block in half. + * + * If this works we have to re-set our variables because we + * could be in a different block now. + */ + error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); + if (error || *stat == 0) + return error; + + + *index = cur->bc_ptrs[level]; + cur->bc_ops->init_rec_from_key(&key, nrec); + return 0; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int +xfs_btree_insrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level to insert record at */ + union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ + union xfs_btree_rec *recp, /* i/o: record data inserted */ + struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer for block */ + union xfs_btree_key key; /* btree key */ + union xfs_btree_ptr nptr; /* new block ptr */ + struct xfs_btree_cur *ncur; /* new btree cursor */ + union xfs_btree_rec nrec; /* new record count */ + int optr; /* old key/record index */ + int ptr; /* key/record index */ + int numrecs;/* number of records */ + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); + + ncur = NULL; + + /* + * If we have an external root pointer, and we've made it to the + * root level, allocate a new root block and we're done. + */ + if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level >= cur->bc_nlevels)) { + error = xfs_btree_new_root(cur, stat); + xfs_btree_set_ptr_null(cur, ptrp); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return error; + } + + /* If we're off the left edge, return failure. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Make a key out of the record data to be inserted, and save it. */ + cur->bc_ops->init_key_from_rec(&key, recp); + + optr = ptr; + + XFS_BTREE_STATS_INC(cur, insrec); + + /* Get pointers to the btree buffer and block. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; + + /* Check that the new entry is being inserted in the right place. */ + if (ptr <= numrecs) { + if (level == 0) { + ASSERT(cur->bc_ops->recs_inorder(cur, recp, + xfs_btree_rec_addr(cur, ptr, block))); + } else { + ASSERT(cur->bc_ops->keys_inorder(cur, &key, + xfs_btree_key_addr(cur, ptr, block))); + } + } +#endif + + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + xfs_btree_set_ptr_null(cur, &nptr); + if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { + error = xfs_btree_make_block_unfull(cur, level, numrecs, + &optr, &ptr, &nptr, &ncur, &nrec, stat); + if (error || *stat == 0) + goto error0; + } + + /* + * The current block may have changed if the block was + * previously full and we have just made space in it. + */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + return error; +#endif + + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); + + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *kp; + union xfs_btree_ptr *pp; + + kp = xfs_btree_key_addr(cur, ptr, block); + pp = xfs_btree_ptr_addr(cur, ptr, block); + +#ifdef DEBUG + for (i = numrecs - ptr; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + return error; + } +#endif + + xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); + xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, ptrp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_keys(cur, kp, &key, 1); + xfs_btree_copy_ptrs(cur, pp, ptrp, 1); + numrecs++; + xfs_btree_set_numrecs(block, numrecs); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs); + xfs_btree_log_keys(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + ASSERT(cur->bc_ops->keys_inorder(cur, kp, + xfs_btree_key_addr(cur, ptr + 1, block))); + } +#endif + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *rp; + + rp = xfs_btree_rec_addr(cur, ptr, block); + + xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_recs(cur, rp, recp, 1); + xfs_btree_set_numrecs(block, ++numrecs); + xfs_btree_log_recs(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + ASSERT(cur->bc_ops->recs_inorder(cur, rp, + xfs_btree_rec_addr(cur, ptr + 1, block))); + } +#endif + } + + /* Log the new number of records in the btree header. */ + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* If we inserted at the start of a block, update the parents' keys. */ + if (optr == 1) { + error = xfs_btree_updkey(cur, &key, level + 1); + if (error) + goto error0; + } + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, recp, + ptr, LASTREC_INSREC); + } + + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *ptrp = nptr; + if (!xfs_btree_ptr_is_null(cur, &nptr)) { + *recp = nrec; + *curp = ncur; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Insert the record at the point referenced by cur. + * + * A multi-level split of the tree on insert will invalidate the original + * cursor. All callers of this function should assume that the cursor is + * no longer valid and revalidate it. + */ +int +xfs_btree_insert( + struct xfs_btree_cur *cur, + int *stat) +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + union xfs_btree_ptr nptr; /* new block number (split result) */ + struct xfs_btree_cur *ncur; /* new cursor (split result) */ + struct xfs_btree_cur *pcur; /* previous level's cursor */ + union xfs_btree_rec rec; /* record to insert */ + + level = 0; + ncur = NULL; + pcur = cur; + + xfs_btree_set_ptr_null(cur, &nptr); + cur->bc_ops->init_rec_from_cur(cur, &rec); + + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nptr into this level of the tree. + * Note if we fail, nptr will be null. + */ + error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); + if (error) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + goto error0; + } + + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + level++; + + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && + (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { + /* Save the state from the cursor before we trash it */ + if (cur->bc_ops->update_cursor) + cur->bc_ops->update_cursor(pcur, cur); + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* If we got a new cursor, switch to it. */ + if (ncur) { + pcur = ncur; + ncur = NULL; + } + } while (!xfs_btree_ptr_is_null(cur, &nptr)); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Try to merge a non-leaf block back into the inode root. + * + * Note: the killroot names comes from the fact that we're effectively + * killing the old root block. But because we can't just delete the + * inode we have to copy the single block it was pointing to into the + * inode. + */ +STATIC int +xfs_btree_kill_iroot( + struct xfs_btree_cur *cur) +{ + int whichfork = cur->bc_private.b.whichfork; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_block *block; + struct xfs_btree_block *cblock; + union xfs_btree_key *kp; + union xfs_btree_key *ckp; + union xfs_btree_ptr *pp; + union xfs_btree_ptr *cpp; + struct xfs_buf *cbp; + int level; + int index; + int numrecs; +#ifdef DEBUG + union xfs_btree_ptr ptr; + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_nlevels > 1); + + /* + * Don't deal with the root block needs to be a leaf case. + * We're just going to turn the thing back into extents anyway. + */ + level = cur->bc_nlevels - 1; + if (level == 1) + goto out0; + + /* + * Give up if the root has multiple children. + */ + block = xfs_btree_get_iroot(cur); + if (xfs_btree_get_numrecs(block) != 1) + goto out0; + + cblock = xfs_btree_get_block(cur, level - 1, &cbp); + numrecs = xfs_btree_get_numrecs(cblock); + + /* + * Only do this if the next level will fit. + * Then the data must be copied up to the inode, + * instead of freeing the root you free the next level. + */ + if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, killroot); + +#ifdef DEBUG + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); +#endif + + index = numrecs - cur->bc_ops->get_maxrecs(cur, level); + if (index) { + xfs_iroot_realloc(cur->bc_private.b.ip, index, + cur->bc_private.b.whichfork); + block = ifp->if_broot; + } + + be16_add_cpu(&block->bb_numrecs, index); + ASSERT(block->bb_numrecs == cblock->bb_numrecs); + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, kp, ckp, numrecs); + + pp = xfs_btree_ptr_addr(cur, 1, block); + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < numrecs; i++) { + int error; + + error = xfs_btree_check_ptr(cur, cpp, i, level - 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + } +#endif + xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); + + cur->bc_ops->free_block(cur, cbp); + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level - 1] = NULL; + be16_add_cpu(&block->bb_level, -1); + xfs_trans_log_inode(cur->bc_tp, ip, + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + cur->bc_nlevels--; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + +/* + * Kill the current root node, and replace it with it's only child node. + */ +STATIC int +xfs_btree_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + cur->bc_ops->set_root(cur, newroot, -1); + + error = cur->bc_ops->free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_ra[level] = 0; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + +STATIC int +xfs_btree_dec_cursor( + struct xfs_btree_cur *cur, + int level, + int *stat) +{ + int error; + int i; + + if (level > 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + return error; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + +/* + * Single level of the btree record deletion routine. + * Delete record pointed to by cur/level. + * Remove the record from its block then rebalance the tree. + * Return 0 for error, 1 for done, 2 to go on to the next level. + */ +STATIC int /* error */ +xfs_btree_delrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level removing record from */ + int *stat) /* fail/done/go-on */ +{ + struct xfs_btree_block *block; /* btree block */ + union xfs_btree_ptr cptr; /* current block ptr */ + struct xfs_buf *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop counter */ + union xfs_btree_key key; /* storage for keyp */ + union xfs_btree_key *keyp = &key; /* passed to the next level */ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs = 0; /* left record count */ + int ptr; /* key/record index */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + int rrecs = 0; /* right record count */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + int numrecs; /* temporary numrec count */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + tcur = NULL; + + /* Get the index of the entry being deleted, check for nothing there. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Get the buffer & block containing the record or key/ptr. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we're off the end of the block. */ + if (ptr > numrecs) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + XFS_BTREE_STATS_INC(cur, delrec); + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); + + /* Excise the entries being deleted. */ + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + + lkp = xfs_btree_key_addr(cur, ptr + 1, block); + lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); + +#ifdef DEBUG + for (i = 0; i < numrecs - ptr; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + if (ptr < numrecs) { + xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); + xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); + xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need to pass a + * key up to the next level (updkey). + */ + if (ptr == 1) + keyp = xfs_btree_key_addr(cur, 1, block); + } else { + /* It's a leaf. operate on records */ + if (ptr < numrecs) { + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, ptr + 1, block), + -1, numrecs - ptr); + xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + if (ptr == 1) { + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, block)); + keyp = &key; + } + } + + /* + * Decrement and log the number of entries in the block. + */ + xfs_btree_set_numrecs(block, --numrecs); + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, NULL, + ptr, LASTREC_DELREC); + } + + /* + * We're at the root level. First, shrink the root block in-memory. + * Try to get rid of the next level down. If we can't then there's + * nothing left to do. + */ + if (level == cur->bc_nlevels - 1) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + xfs_iroot_realloc(cur->bc_private.b.ip, -1, + cur->bc_private.b.whichfork); + + error = xfs_btree_kill_iroot(cur); + if (error) + goto error0; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + *stat = 1; + return 0; + } + + /* + * If this is the root level, and there's only one entry left, + * and it's NOT the leaf level, then we can get rid of this + * level. + */ + if (numrecs == 1 && level > 0) { + union xfs_btree_ptr *pp; + /* + * pp is still set to the first pointer in the block. + * Make it the new root of the btree. + */ + pp = xfs_btree_ptr_addr(cur, 1, block); + error = xfs_btree_kill_root(cur, bp, level, pp); + if (error) + goto error0; + } else if (level > 0) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + } + *stat = 1; + return 0; + } + + /* + * If we deleted the leftmost entry in the block, update the + * key values above us in the tree. + */ + if (ptr == 1) { + error = xfs_btree_updkey(cur, keyp, level + 1); + if (error) + goto error0; + } + + /* + * If the number of records remaining in the block is at least + * the minimum, we're done. + */ + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + /* + * Otherwise, we have to move some records around to keep the + * tree balanced. Look at the left and right sibling blocks to + * see if we can re-balance by moving only one record. + */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + /* + * One child of root, need to get a chance to copy its contents + * into the root and delete it. Can't go up to next level, + * there's nothing to delete there. + */ + if (xfs_btree_ptr_is_null(cur, &rptr) && + xfs_btree_ptr_is_null(cur, &lptr) && + level == cur->bc_nlevels - 2) { + error = xfs_btree_kill_iroot(cur); + if (!error) + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || + !xfs_btree_ptr_is_null(cur, &lptr)); + + /* + * Duplicate the cursor so our btree manipulations here won't + * disrupt the next level up. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + + /* + * If there's a right sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* + * Move the temp cursor to the last entry in the next block. + * Actually any entry but the first would suffice. + */ + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + right = xfs_btree_get_block(tcur, level, &rbp); +#ifdef DEBUG + error = xfs_btree_check_block(tcur, right, level, rbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); + + /* + * If right block is full enough so that removing one entry + * won't make it too empty, and left-shifting an entry out + * of right to us works, we're done. + */ + if (xfs_btree_get_numrecs(right) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_lshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference, and fix up the temp cursor to point + * to our block again (last record). + */ + rrecs = xfs_btree_get_numrecs(right); + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + + /* + * If there's a left sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + /* + * Move the temp cursor to the first entry in the + * previous block. + */ + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + left = xfs_btree_get_block(tcur, level, &lbp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); + + /* + * If left block is full enough so that removing one entry + * won't make it too empty, and right-shifting an entry out + * of left to us works, we're done. + */ + if (xfs_btree_get_numrecs(left) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_rshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + if (level == 0) + cur->bc_ptrs[0]++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference. + */ + lrecs = xfs_btree_get_numrecs(left); + } + + /* Delete the temp cursor, we're done with it. */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + /* If here, we need to do a join to keep the tree balanced. */ + ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); + + if (!xfs_btree_ptr_is_null(cur, &lptr) && + lrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "right" to be the starting block, + * "left" to be the left neighbor. + */ + rptr = cptr; + right = block; + rbp = bp; + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); + if (error) + goto error0; + + /* + * If that won't work, see if we can join with the right neighbor block. + */ + } else if (!xfs_btree_ptr_is_null(cur, &rptr) && + rrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "left" to be the starting block, + * "right" to be the right neighbor. + */ + lptr = cptr; + left = block; + lbp = bp; + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + + /* + * Otherwise, we can't fix the imbalance. + * Just return. This is probably a logic error, but it's not fatal. + */ + } else { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + rrecs = xfs_btree_get_numrecs(right); + lrecs = xfs_btree_get_numrecs(left); + + /* + * We're now going to join "left" and "right" by moving all the stuff + * in "right" to "left" and deleting "right". + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs + 1, left); + lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + for (i = 1; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_keys(cur, lkp, rkp, rrecs); + xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); + + xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, rrecs); + xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); + } + + XFS_BTREE_STATS_INC(cur, join); + + /* + * Fix up the number of records and right block pointer in the + * surviving block, and log it. + */ + xfs_btree_set_numrecs(left, lrecs + rrecs); + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* If there is a right sibling, point it to the remaining block. */ + xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &cptr)) { + error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + + /* Free the deleted block. */ + error = cur->bc_ops->free_block(cur, rbp); + if (error) + goto error0; + XFS_BTREE_STATS_INC(cur, free); + + /* + * If we joined with the left neighbor, set the buffer in the + * cursor to the left block, and fix up the index. + */ + if (bp != lbp) { + cur->bc_bufs[level] = lbp; + cur->bc_ptrs[level] += lrecs; + cur->bc_ra[level] = 0; + } + /* + * If we joined with the right neighbor and there's a level above + * us, increment the cursor at that level. + */ + else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || + (level + 1 < cur->bc_nlevels)) { + error = xfs_btree_increment(cur, level + 1, &i); + if (error) + goto error0; + } + + /* + * Readjust the ptr at this level if it's not a leaf, since it's + * still pointing at the deletion point, which makes the cursor + * inconsistent. If this makes the ptr 0, the caller fixes it up. + * We can't use decrement because it would change the next level up. + */ + if (level > 0) + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + /* Return value means the next level up has something to do. */ + *stat = 2; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (tcur) + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_btree_delete( + struct xfs_btree_cur *cur, + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int level; + int i; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* + * Go up the tree, starting at leaf level. + * + * If 2 is returned then a join was done; go to the next level. + * Otherwise we are done. + */ + for (level = 0, i = 2; i == 2; level++) { + error = xfs_btree_delrec(cur, level, &i); + if (error) + goto error0; + } + + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + goto error0; + break; + } + } + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_btree_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_rec **recp, /* output: btree record */ + int *stat) /* output: success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer pointer */ + int ptr; /* record number */ +#ifdef DEBUG + int error; /* error return value */ +#endif + + ptr = cur->bc_ptrs[0]; + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + return error; +#endif + + /* + * Off the right end or left end, return failure. + */ + if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { + *stat = 0; + return 0; + } + + /* + * Point to the record and extract its data. + */ + *recp = xfs_btree_rec_addr(cur, ptr, block); + *stat = 1; + return 0; +} + +/* + * Change the owner of a btree. + * + * The mechanism we use here is ordered buffer logging. Because we don't know + * how many buffers were are going to need to modify, we don't really want to + * have to make transaction reservations for the worst case of every buffer in a + * full size btree as that may be more space that we can fit in the log.... + * + * We do the btree walk in the most optimal manner possible - we have sibling + * pointers so we can just walk all the blocks on each level from left to right + * in a single pass, and then move to the next level and do the same. We can + * also do readahead on the sibling pointers to get IO moving more quickly, + * though for slow disks this is unlikely to make much difference to performance + * as the amount of CPU work we have to do before moving to the next block is + * relatively small. + * + * For each btree block that we load, modify the owner appropriately, set the + * buffer as an ordered buffer and log it appropriately. We need to ensure that + * we mark the region we change dirty so that if the buffer is relogged in + * a subsequent transaction the changes we make here as an ordered buffer are + * correctly relogged in that transaction. If we are in recovery context, then + * just queue the modified buffer as delayed write buffer so the transaction + * recovery completion writes the changes to disk. + */ +static int +xfs_btree_block_change_owner( + struct xfs_btree_cur *cur, + int level, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_ptr rptr; + + /* do right sibling readahead */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* modify the owner */ + block = xfs_btree_get_block(cur, level, &bp); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + block->bb_u.l.bb_owner = cpu_to_be64(new_owner); + else + block->bb_u.s.bb_owner = cpu_to_be32(new_owner); + + /* + * If the block is a root block hosted in an inode, we might not have a + * buffer pointer here and we shouldn't attempt to log the change as the + * information is already held in the inode and discarded when the root + * block is formatted into the on-disk inode fork. We still change it, + * though, so everything is consistent in memory. + */ + if (bp) { + if (cur->bc_tp) { + xfs_trans_ordered_buf(cur->bc_tp, bp); + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); + } else { + xfs_buf_delwri_queue(bp, buffer_list); + } + } else { + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(level == cur->bc_nlevels - 1); + } + + /* now read rh sibling block for next iteration */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + return ENOENT; + + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); +} + +int +xfs_btree_change_owner( + struct xfs_btree_cur *cur, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + union xfs_btree_ptr lptr; + int level; + struct xfs_btree_block *block = NULL; + int error = 0; + + cur->bc_ops->init_ptr_from_cur(cur, &lptr); + + /* for each level */ + for (level = cur->bc_nlevels - 1; level >= 0; level--) { + /* grab the left hand block */ + error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); + if (error) + return error; + + /* readahead the left most block for the next level down */ + if (level > 0) { + union xfs_btree_ptr *ptr; + + ptr = xfs_btree_ptr_addr(cur, 1, block); + xfs_btree_readahead_ptr(cur, ptr, 1); + + /* save for the next iteration of the loop */ + lptr = *ptr; + } + + /* for each buffer in the level */ + do { + error = xfs_btree_block_change_owner(cur, level, + new_owner, + buffer_list); + } while (!error); + + if (error != ENOENT) + return error; + } + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c new file mode 100644 index 000000000000..a1a4e3e47a1e --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -0,0 +1,2665 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_buf_item.h" + +/* + * xfs_da_btree.c + * + * Routines to implement directories as Btrees of hashed names. + */ + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Routines used for growing the Btree. + */ +STATIC int xfs_da3_root_split(xfs_da_state_t *state, + xfs_da_state_blk_t *existing_root, + xfs_da_state_blk_t *new_child); +STATIC int xfs_da3_node_split(xfs_da_state_t *state, + xfs_da_state_blk_t *existing_blk, + xfs_da_state_blk_t *split_blk, + xfs_da_state_blk_t *blk_to_add, + int treelevel, + int *result); +STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, + xfs_da_state_blk_t *node_blk_1, + xfs_da_state_blk_t *node_blk_2); +STATIC void xfs_da3_node_add(xfs_da_state_t *state, + xfs_da_state_blk_t *old_node_blk, + xfs_da_state_blk_t *new_node_blk); + +/* + * Routines used for shrinking the Btree. + */ +STATIC int xfs_da3_root_join(xfs_da_state_t *state, + xfs_da_state_blk_t *root_blk); +STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); +STATIC void xfs_da3_node_remove(xfs_da_state_t *state, + xfs_da_state_blk_t *drop_blk); +STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, + xfs_da_state_blk_t *src_node_blk, + xfs_da_state_blk_t *dst_node_blk); + +/* + * Utility routines. + */ +STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, + xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk); + + +kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ + +/* + * Allocate a dir-state structure. + * We don't put them on the stack since they're large. + */ +xfs_da_state_t * +xfs_da_state_alloc(void) +{ + return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); +} + +/* + * Kill the altpath contents of a da-state structure. + */ +STATIC void +xfs_da_state_kill_altpath(xfs_da_state_t *state) +{ + int i; + + for (i = 0; i < state->altpath.active; i++) + state->altpath.blk[i].bp = NULL; + state->altpath.active = 0; +} + +/* + * Free a da-state structure. + */ +void +xfs_da_state_free(xfs_da_state_t *state) +{ + xfs_da_state_kill_altpath(state); +#ifdef DEBUG + memset((char *)state, 0, sizeof(*state)); +#endif /* DEBUG */ + kmem_zone_free(xfs_da_state_zone, state); +} + +static bool +xfs_da3_node_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_da_intnode *hdr = bp->b_addr; + struct xfs_da3_icnode_hdr ichdr; + const struct xfs_dir_ops *ops; + + ops = xfs_dir_get_ops(mp, NULL); + + ops->node_hdr_from_disk(&ichdr, hdr); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + + if (ichdr.magic != XFS_DA3_NODE_MAGIC) + return false; + + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) + return false; + } else { + if (ichdr.magic != XFS_DA_NODE_MAGIC) + return false; + } + if (ichdr.level == 0) + return false; + if (ichdr.level > XFS_DA_NODE_MAXDEPTH) + return false; + if (ichdr.count == 0) + return false; + + /* + * we don't know if the node is for and attribute or directory tree, + * so only fail if the count is outside both bounds + */ + if (ichdr.count > mp->m_dir_geo->node_ents && + ichdr.count > mp->m_attr_geo->node_ents) + return false; + + /* XXX: hash order check? */ + + return true; +} + +static void +xfs_da3_node_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + + if (!xfs_da3_node_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); +} + +/* + * leaf/node format detection on trees is sketchy, so a node read can be done on + * leaf level blocks when detection identifies the tree as a node format tree + * incorrectly. In this case, we need to swap the verifier to match the correct + * format of the block being read. + */ +static void +xfs_da3_node_read_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: + if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { + xfs_buf_ioerror(bp, EFSBADCRC); + break; + } + /* fall through */ + case XFS_DA_NODE_MAGIC: + if (!xfs_da3_node_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + break; + } + return; + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + bp->b_ops = &xfs_attr3_leaf_buf_ops; + bp->b_ops->verify_read(bp); + return; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + bp->b_ops = &xfs_dir3_leafn_buf_ops; + bp->b_ops->verify_read(bp); + return; + default: + break; + } + + /* corrupt block */ + xfs_verifier_error(bp); +} + +const struct xfs_buf_ops xfs_da3_node_buf_ops = { + .verify_read = xfs_da3_node_read_verify, + .verify_write = xfs_da3_node_write_verify, +}; + +int +xfs_da3_node_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int which_fork) +{ + int err; + + err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, + which_fork, &xfs_da3_node_buf_ops); + if (!err && tp) { + struct xfs_da_blkinfo *info = (*bpp)->b_addr; + int type; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + type = XFS_BLFT_DA_NODE_BUF; + break; + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + type = XFS_BLFT_ATTR_LEAF_BUF; + break; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + type = XFS_BLFT_DIR_LEAFN_BUF; + break; + default: + type = 0; + ASSERT(0); + break; + } + xfs_trans_buf_set_type(tp, *bpp, type); + } + return err; +} + +/*======================================================================== + * Routines used for growing the Btree. + *========================================================================*/ + +/* + * Create the initial contents of an intermediate node. + */ +int +xfs_da3_node_create( + struct xfs_da_args *args, + xfs_dablk_t blkno, + int level, + struct xfs_buf **bpp, + int whichfork) +{ + struct xfs_da_intnode *node; + struct xfs_trans *tp = args->trans; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_da3_icnode_hdr ichdr = {0}; + struct xfs_buf *bp; + int error; + struct xfs_inode *dp = args->dp; + + trace_xfs_da_node_create(args); + ASSERT(level <= XFS_DA_NODE_MAXDEPTH); + + error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); + if (error) + return error; + bp->b_ops = &xfs_da3_node_buf_ops; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); + node = bp->b_addr; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + + ichdr.magic = XFS_DA3_NODE_MAGIC; + hdr3->info.blkno = cpu_to_be64(bp->b_bn); + hdr3->info.owner = cpu_to_be64(args->dp->i_ino); + uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid); + } else { + ichdr.magic = XFS_DA_NODE_MAGIC; + } + ichdr.level = level; + + dp->d_ops->node_hdr_to_disk(node, &ichdr); + xfs_trans_log_buf(tp, bp, + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + + *bpp = bp; + return 0; +} + +/* + * Split a leaf node, rebalance, then possibly split + * intermediate nodes, rebalance, etc. + */ +int /* error */ +xfs_da3_split( + struct xfs_da_state *state) +{ + struct xfs_da_state_blk *oldblk; + struct xfs_da_state_blk *newblk; + struct xfs_da_state_blk *addblk; + struct xfs_da_intnode *node; + struct xfs_buf *bp; + int max; + int action = 0; + int error; + int i; + + trace_xfs_da_split(state->args); + + /* + * Walk back up the tree splitting/inserting/adjusting as necessary. + * If we need to insert and there isn't room, split the node, then + * decide which fragment to insert the new block from below into. + * Note that we may split the root this way, but we need more fixup. + */ + max = state->path.active - 1; + ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); + ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || + state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); + + addblk = &state->path.blk[max]; /* initial dummy value */ + for (i = max; (i >= 0) && addblk; state->path.active--, i--) { + oldblk = &state->path.blk[i]; + newblk = &state->altpath.blk[i]; + + /* + * If a leaf node then + * Allocate a new leaf node, then rebalance across them. + * else if an intermediate node then + * We split on the last layer, must we split the node? + */ + switch (oldblk->magic) { + case XFS_ATTR_LEAF_MAGIC: + error = xfs_attr3_leaf_split(state, oldblk, newblk); + if ((error != 0) && (error != ENOSPC)) { + return error; /* GROT: attr is inconsistent */ + } + if (!error) { + addblk = newblk; + break; + } + /* + * Entry wouldn't fit, split the leaf again. + */ + state->extravalid = 1; + if (state->inleaf) { + state->extraafter = 0; /* before newblk */ + trace_xfs_attr_leaf_split_before(state->args); + error = xfs_attr3_leaf_split(state, oldblk, + &state->extrablk); + } else { + state->extraafter = 1; /* after newblk */ + trace_xfs_attr_leaf_split_after(state->args); + error = xfs_attr3_leaf_split(state, newblk, + &state->extrablk); + } + if (error) + return error; /* GROT: attr inconsistent */ + addblk = newblk; + break; + case XFS_DIR2_LEAFN_MAGIC: + error = xfs_dir2_leafn_split(state, oldblk, newblk); + if (error) + return error; + addblk = newblk; + break; + case XFS_DA_NODE_MAGIC: + error = xfs_da3_node_split(state, oldblk, newblk, addblk, + max - i, &action); + addblk->bp = NULL; + if (error) + return error; /* GROT: dir is inconsistent */ + /* + * Record the newly split block for the next time thru? + */ + if (action) + addblk = newblk; + else + addblk = NULL; + break; + } + + /* + * Update the btree to show the new hashval for this child. + */ + xfs_da3_fixhashpath(state, &state->path); + } + if (!addblk) + return 0; + + /* + * Split the root node. + */ + ASSERT(state->path.active == 0); + oldblk = &state->path.blk[0]; + error = xfs_da3_root_split(state, oldblk, addblk); + if (error) { + addblk->bp = NULL; + return error; /* GROT: dir is inconsistent */ + } + + /* + * Update pointers to the node which used to be block 0 and + * just got bumped because of the addition of a new root node. + * There might be three blocks involved if a double split occurred, + * and the original block 0 could be at any position in the list. + * + * Note: the magic numbers and sibling pointers are in the same + * physical place for both v2 and v3 headers (by design). Hence it + * doesn't matter which version of the xfs_da_intnode structure we use + * here as the result will be the same using either structure. + */ + node = oldblk->bp->b_addr; + if (node->hdr.info.forw) { + if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { + bp = addblk->bp; + } else { + ASSERT(state->extravalid); + bp = state->extrablk.bp; + } + node = bp->b_addr; + node->hdr.info.back = cpu_to_be32(oldblk->blkno); + xfs_trans_log_buf(state->args->trans, bp, + XFS_DA_LOGRANGE(node, &node->hdr.info, + sizeof(node->hdr.info))); + } + node = oldblk->bp->b_addr; + if (node->hdr.info.back) { + if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { + bp = addblk->bp; + } else { + ASSERT(state->extravalid); + bp = state->extrablk.bp; + } + node = bp->b_addr; + node->hdr.info.forw = cpu_to_be32(oldblk->blkno); + xfs_trans_log_buf(state->args->trans, bp, + XFS_DA_LOGRANGE(node, &node->hdr.info, + sizeof(node->hdr.info))); + } + addblk->bp = NULL; + return 0; +} + +/* + * Split the root. We have to create a new root and point to the two + * parts (the split old root) that we just created. Copy block zero to + * the EOF, extending the inode in process. + */ +STATIC int /* error */ +xfs_da3_root_split( + struct xfs_da_state *state, + struct xfs_da_state_blk *blk1, + struct xfs_da_state_blk *blk2) +{ + struct xfs_da_intnode *node; + struct xfs_da_intnode *oldroot; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_da_args *args; + struct xfs_buf *bp; + struct xfs_inode *dp; + struct xfs_trans *tp; + struct xfs_mount *mp; + struct xfs_dir2_leaf *leaf; + xfs_dablk_t blkno; + int level; + int error; + int size; + + trace_xfs_da_root_split(state->args); + + /* + * Copy the existing (incorrect) block from the root node position + * to a free space somewhere. + */ + args = state->args; + error = xfs_da_grow_inode(args, &blkno); + if (error) + return error; + + dp = args->dp; + tp = args->trans; + mp = state->mp; + error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); + if (error) + return error; + node = bp->b_addr; + oldroot = blk1->bp->b_addr; + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { + struct xfs_da3_icnode_hdr nodehdr; + + dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); + btree = dp->d_ops->node_tree_p(oldroot); + size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); + level = nodehdr.level; + + /* + * we are about to copy oldroot to bp, so set up the type + * of bp while we know exactly what it will be. + */ + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); + } else { + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + + leaf = (xfs_dir2_leaf_t *)oldroot; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + + ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || + leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); + size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); + level = 0; + + /* + * we are about to copy oldroot to bp, so set up the type + * of bp while we know exactly what it will be. + */ + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); + } + + /* + * we can copy most of the information in the node from one block to + * another, but for CRC enabled headers we have to make sure that the + * block specific identifiers are kept intact. We update the buffer + * directly for this. + */ + memcpy(node, oldroot, size); + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || + oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { + struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; + + node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); + } + xfs_trans_log_buf(tp, bp, 0, size - 1); + + bp->b_ops = blk1->bp->b_ops; + xfs_trans_buf_copy_type(bp, blk1->bp); + blk1->bp = bp; + blk1->blkno = blkno; + + /* + * Set up the new root node. + */ + error = xfs_da3_node_create(args, + (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, + level + 1, &bp, args->whichfork); + if (error) + return error; + + node = bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + btree[0].hashval = cpu_to_be32(blk1->hashval); + btree[0].before = cpu_to_be32(blk1->blkno); + btree[1].hashval = cpu_to_be32(blk2->hashval); + btree[1].before = cpu_to_be32(blk2->blkno); + nodehdr.count = 2; + dp->d_ops->node_hdr_to_disk(node, &nodehdr); + +#ifdef DEBUG + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { + ASSERT(blk1->blkno >= args->geo->leafblk && + blk1->blkno < args->geo->freeblk); + ASSERT(blk2->blkno >= args->geo->leafblk && + blk2->blkno < args->geo->freeblk); + } +#endif + + /* Header is already logged by xfs_da_node_create */ + xfs_trans_log_buf(tp, bp, + XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); + + return 0; +} + +/* + * Split the node, rebalance, then add the new entry. + */ +STATIC int /* error */ +xfs_da3_node_split( + struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk, + struct xfs_da_state_blk *addblk, + int treelevel, + int *result) +{ + struct xfs_da_intnode *node; + struct xfs_da3_icnode_hdr nodehdr; + xfs_dablk_t blkno; + int newcount; + int error; + int useextra; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_split(state->args); + + node = oldblk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + + /* + * With V2 dirs the extra block is data or freespace. + */ + useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; + newcount = 1 + useextra; + /* + * Do we have to split the node? + */ + if (nodehdr.count + newcount > state->args->geo->node_ents) { + /* + * Allocate a new node, add to the doubly linked chain of + * nodes, then move some of our excess entries into it. + */ + error = xfs_da_grow_inode(state->args, &blkno); + if (error) + return error; /* GROT: dir is inconsistent */ + + error = xfs_da3_node_create(state->args, blkno, treelevel, + &newblk->bp, state->args->whichfork); + if (error) + return error; /* GROT: dir is inconsistent */ + newblk->blkno = blkno; + newblk->magic = XFS_DA_NODE_MAGIC; + xfs_da3_node_rebalance(state, oldblk, newblk); + error = xfs_da3_blk_link(state, oldblk, newblk); + if (error) + return error; + *result = 1; + } else { + *result = 0; + } + + /* + * Insert the new entry(s) into the correct block + * (updating last hashval in the process). + * + * xfs_da3_node_add() inserts BEFORE the given index, + * and as a result of using node_lookup_int() we always + * point to a valid entry (not after one), but a split + * operation always results in a new block whose hashvals + * FOLLOW the current block. + * + * If we had double-split op below us, then add the extra block too. + */ + node = oldblk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + if (oldblk->index <= nodehdr.count) { + oldblk->index++; + xfs_da3_node_add(state, oldblk, addblk); + if (useextra) { + if (state->extraafter) + oldblk->index++; + xfs_da3_node_add(state, oldblk, &state->extrablk); + state->extravalid = 0; + } + } else { + newblk->index++; + xfs_da3_node_add(state, newblk, addblk); + if (useextra) { + if (state->extraafter) + newblk->index++; + xfs_da3_node_add(state, newblk, &state->extrablk); + state->extravalid = 0; + } + } + + return 0; +} + +/* + * Balance the btree elements between two intermediate nodes, + * usually one full and one empty. + * + * NOTE: if blk2 is empty, then it will get the upper half of blk1. + */ +STATIC void +xfs_da3_node_rebalance( + struct xfs_da_state *state, + struct xfs_da_state_blk *blk1, + struct xfs_da_state_blk *blk2) +{ + struct xfs_da_intnode *node1; + struct xfs_da_intnode *node2; + struct xfs_da_intnode *tmpnode; + struct xfs_da_node_entry *btree1; + struct xfs_da_node_entry *btree2; + struct xfs_da_node_entry *btree_s; + struct xfs_da_node_entry *btree_d; + struct xfs_da3_icnode_hdr nodehdr1; + struct xfs_da3_icnode_hdr nodehdr2; + struct xfs_trans *tp; + int count; + int tmp; + int swap = 0; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_rebalance(state->args); + + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); + + /* + * Figure out how many entries need to move, and in which direction. + * Swap the nodes around if that makes it simpler. + */ + if (nodehdr1.count > 0 && nodehdr2.count > 0 && + ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || + (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < + be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { + tmpnode = node1; + node1 = node2; + node2 = tmpnode; + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); + swap = 1; + } + + count = (nodehdr1.count - nodehdr2.count) / 2; + if (count == 0) + return; + tp = state->args->trans; + /* + * Two cases: high-to-low and low-to-high. + */ + if (count > 0) { + /* + * Move elements in node2 up to make a hole. + */ + tmp = nodehdr2.count; + if (tmp > 0) { + tmp *= (uint)sizeof(xfs_da_node_entry_t); + btree_s = &btree2[0]; + btree_d = &btree2[count]; + memmove(btree_d, btree_s, tmp); + } + + /* + * Move the req'd B-tree elements from high in node1 to + * low in node2. + */ + nodehdr2.count += count; + tmp = count * (uint)sizeof(xfs_da_node_entry_t); + btree_s = &btree1[nodehdr1.count - count]; + btree_d = &btree2[0]; + memcpy(btree_d, btree_s, tmp); + nodehdr1.count -= count; + } else { + /* + * Move the req'd B-tree elements from low in node2 to + * high in node1. + */ + count = -count; + tmp = count * (uint)sizeof(xfs_da_node_entry_t); + btree_s = &btree2[0]; + btree_d = &btree1[nodehdr1.count]; + memcpy(btree_d, btree_s, tmp); + nodehdr1.count += count; + + xfs_trans_log_buf(tp, blk1->bp, + XFS_DA_LOGRANGE(node1, btree_d, tmp)); + + /* + * Move elements in node2 down to fill the hole. + */ + tmp = nodehdr2.count - count; + tmp *= (uint)sizeof(xfs_da_node_entry_t); + btree_s = &btree2[count]; + btree_d = &btree2[0]; + memmove(btree_d, btree_s, tmp); + nodehdr2.count -= count; + } + + /* + * Log header of node 1 and all current bits of node 2. + */ + dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); + xfs_trans_log_buf(tp, blk1->bp, + XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); + + dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); + xfs_trans_log_buf(tp, blk2->bp, + XFS_DA_LOGRANGE(node2, &node2->hdr, + dp->d_ops->node_hdr_size + + (sizeof(btree2[0]) * nodehdr2.count))); + + /* + * Record the last hashval from each block for upward propagation. + * (note: don't use the swapped node pointers) + */ + if (swap) { + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); + } + blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); + blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); + + /* + * Adjust the expected index for insertion. + */ + if (blk1->index >= nodehdr1.count) { + blk2->index = blk1->index - nodehdr1.count; + blk1->index = nodehdr1.count + 1; /* make it invalid */ + } +} + +/* + * Add a new entry to an intermediate node. + */ +STATIC void +xfs_da3_node_add( + struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk) +{ + struct xfs_da_intnode *node; + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_da_node_entry *btree; + int tmp; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_add(state->args); + + node = oldblk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + + ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); + ASSERT(newblk->blkno != 0); + if (state->args->whichfork == XFS_DATA_FORK) + ASSERT(newblk->blkno >= state->args->geo->leafblk && + newblk->blkno < state->args->geo->freeblk); + + /* + * We may need to make some room before we insert the new node. + */ + tmp = 0; + if (oldblk->index < nodehdr.count) { + tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); + memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); + } + btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); + btree[oldblk->index].before = cpu_to_be32(newblk->blkno); + xfs_trans_log_buf(state->args->trans, oldblk->bp, + XFS_DA_LOGRANGE(node, &btree[oldblk->index], + tmp + sizeof(*btree))); + + nodehdr.count += 1; + dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_trans_log_buf(state->args->trans, oldblk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + + /* + * Copy the last hash value from the oldblk to propagate upwards. + */ + oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); +} + +/*======================================================================== + * Routines used for shrinking the Btree. + *========================================================================*/ + +/* + * Deallocate an empty leaf node, remove it from its parent, + * possibly deallocating that block, etc... + */ +int +xfs_da3_join( + struct xfs_da_state *state) +{ + struct xfs_da_state_blk *drop_blk; + struct xfs_da_state_blk *save_blk; + int action = 0; + int error; + + trace_xfs_da_join(state->args); + + drop_blk = &state->path.blk[ state->path.active-1 ]; + save_blk = &state->altpath.blk[ state->path.active-1 ]; + ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); + ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || + drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); + + /* + * Walk back up the tree joining/deallocating as necessary. + * When we stop dropping blocks, break out. + */ + for ( ; state->path.active >= 2; drop_blk--, save_blk--, + state->path.active--) { + /* + * See if we can combine the block with a neighbor. + * (action == 0) => no options, just leave + * (action == 1) => coalesce, then unlink + * (action == 2) => block empty, unlink it + */ + switch (drop_blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + error = xfs_attr3_leaf_toosmall(state, &action); + if (error) + return error; + if (action == 0) + return 0; + xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); + break; + case XFS_DIR2_LEAFN_MAGIC: + error = xfs_dir2_leafn_toosmall(state, &action); + if (error) + return error; + if (action == 0) + return 0; + xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); + break; + case XFS_DA_NODE_MAGIC: + /* + * Remove the offending node, fixup hashvals, + * check for a toosmall neighbor. + */ + xfs_da3_node_remove(state, drop_blk); + xfs_da3_fixhashpath(state, &state->path); + error = xfs_da3_node_toosmall(state, &action); + if (error) + return error; + if (action == 0) + return 0; + xfs_da3_node_unbalance(state, drop_blk, save_blk); + break; + } + xfs_da3_fixhashpath(state, &state->altpath); + error = xfs_da3_blk_unlink(state, drop_blk, save_blk); + xfs_da_state_kill_altpath(state); + if (error) + return error; + error = xfs_da_shrink_inode(state->args, drop_blk->blkno, + drop_blk->bp); + drop_blk->bp = NULL; + if (error) + return error; + } + /* + * We joined all the way to the top. If it turns out that + * we only have one entry in the root, make the child block + * the new root. + */ + xfs_da3_node_remove(state, drop_blk); + xfs_da3_fixhashpath(state, &state->path); + error = xfs_da3_root_join(state, &state->path.blk[0]); + return error; +} + +#ifdef DEBUG +static void +xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) +{ + __be16 magic = blkinfo->magic; + + if (level == 1) { + ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || + magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || + magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); + } else { + ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); + } + ASSERT(!blkinfo->forw); + ASSERT(!blkinfo->back); +} +#else /* !DEBUG */ +#define xfs_da_blkinfo_onlychild_validate(blkinfo, level) +#endif /* !DEBUG */ + +/* + * We have only one entry in the root. Copy the only remaining child of + * the old root to block 0 as the new root node. + */ +STATIC int +xfs_da3_root_join( + struct xfs_da_state *state, + struct xfs_da_state_blk *root_blk) +{ + struct xfs_da_intnode *oldroot; + struct xfs_da_args *args; + xfs_dablk_t child; + struct xfs_buf *bp; + struct xfs_da3_icnode_hdr oldroothdr; + struct xfs_da_node_entry *btree; + int error; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_root_join(state->args); + + ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); + + args = state->args; + oldroot = root_blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); + ASSERT(oldroothdr.forw == 0); + ASSERT(oldroothdr.back == 0); + + /* + * If the root has more than one child, then don't do anything. + */ + if (oldroothdr.count > 1) + return 0; + + /* + * Read in the (only) child block, then copy those bytes into + * the root block's buffer and free the original child block. + */ + btree = dp->d_ops->node_tree_p(oldroot); + child = be32_to_cpu(btree[0].before); + ASSERT(child != 0); + error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, + args->whichfork); + if (error) + return error; + xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); + + /* + * This could be copying a leaf back into the root block in the case of + * there only being a single leaf block left in the tree. Hence we have + * to update the b_ops pointer as well to match the buffer type change + * that could occur. For dir3 blocks we also need to update the block + * number in the buffer header. + */ + memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); + root_blk->bp->b_ops = bp->b_ops; + xfs_trans_buf_copy_type(root_blk->bp, bp); + if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { + struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; + da3->blkno = cpu_to_be64(root_blk->bp->b_bn); + } + xfs_trans_log_buf(args->trans, root_blk->bp, 0, + args->geo->blksize - 1); + error = xfs_da_shrink_inode(args, child, bp); + return error; +} + +/* + * Check a node block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + */ +STATIC int +xfs_da3_node_toosmall( + struct xfs_da_state *state, + int *action) +{ + struct xfs_da_intnode *node; + struct xfs_da_state_blk *blk; + struct xfs_da_blkinfo *info; + xfs_dablk_t blkno; + struct xfs_buf *bp; + struct xfs_da3_icnode_hdr nodehdr; + int count; + int forward; + int error; + int retval; + int i; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_toosmall(state->args); + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[ state->path.active-1 ]; + info = blk->bp->b_addr; + node = (xfs_da_intnode_t *)info; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + if (nodehdr.count > (state->args->geo->node_ents >> 1)) { + *action = 0; /* blk over 50%, don't try to join */ + return 0; /* blk over 50%, don't try to join */ + } + + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (arbitrarily) + * to merge with the forward block unless it is NULL. + */ + if (nodehdr.count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = (info->forw != 0); + memcpy(&state->altpath, &state->path, sizeof(state->path)); + error = xfs_da3_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) + return error; + if (retval) { + *action = 0; + } else { + *action = 2; + } + return 0; + } + + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink a directory over time. + */ + count = state->args->geo->node_ents; + count -= state->args->geo->node_ents >> 2; + count -= nodehdr.count; + + /* start with smaller blk num */ + forward = nodehdr.forw < nodehdr.back; + for (i = 0; i < 2; forward = !forward, i++) { + struct xfs_da3_icnode_hdr thdr; + if (forward) + blkno = nodehdr.forw; + else + blkno = nodehdr.back; + if (blkno == 0) + continue; + error = xfs_da3_node_read(state->args->trans, dp, + blkno, -1, &bp, state->args->whichfork); + if (error) + return error; + + node = bp->b_addr; + dp->d_ops->node_hdr_from_disk(&thdr, node); + xfs_trans_brelse(state->args->trans, bp); + + if (count - thdr.count >= 0) + break; /* fits with at least 25% to spare */ + } + if (i >= 2) { + *action = 0; + return 0; + } + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + memcpy(&state->altpath, &state->path, sizeof(state->path)); + if (blkno < blk->blkno) { + error = xfs_da3_path_shift(state, &state->altpath, forward, + 0, &retval); + } else { + error = xfs_da3_path_shift(state, &state->path, forward, + 0, &retval); + } + if (error) + return error; + if (retval) { + *action = 0; + return 0; + } + *action = 1; + return 0; +} + +/* + * Pick up the last hashvalue from an intermediate node. + */ +STATIC uint +xfs_da3_node_lasthash( + struct xfs_inode *dp, + struct xfs_buf *bp, + int *count) +{ + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr nodehdr; + + node = bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + if (count) + *count = nodehdr.count; + if (!nodehdr.count) + return 0; + btree = dp->d_ops->node_tree_p(node); + return be32_to_cpu(btree[nodehdr.count - 1].hashval); +} + +/* + * Walk back up the tree adjusting hash values as necessary, + * when we stop making changes, return. + */ +void +xfs_da3_fixhashpath( + struct xfs_da_state *state, + struct xfs_da_state_path *path) +{ + struct xfs_da_state_blk *blk; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + xfs_dahash_t lasthash=0; + int level; + int count; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_fixhashpath(state->args); + + level = path->active-1; + blk = &path->blk[ level ]; + switch (blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); + if (count == 0) + return; + break; + case XFS_DIR2_LEAFN_MAGIC: + lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); + if (count == 0) + return; + break; + case XFS_DA_NODE_MAGIC: + lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); + if (count == 0) + return; + break; + } + for (blk--, level--; level >= 0; blk--, level--) { + struct xfs_da3_icnode_hdr nodehdr; + + node = blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + if (be32_to_cpu(btree[blk->index].hashval) == lasthash) + break; + blk->hashval = lasthash; + btree[blk->index].hashval = cpu_to_be32(lasthash); + xfs_trans_log_buf(state->args->trans, blk->bp, + XFS_DA_LOGRANGE(node, &btree[blk->index], + sizeof(*btree))); + + lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); + } +} + +/* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da3_node_remove( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk) +{ + struct xfs_da_intnode *node; + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_da_node_entry *btree; + int index; + int tmp; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_remove(state->args); + + node = drop_blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + ASSERT(drop_blk->index < nodehdr.count); + ASSERT(drop_blk->index >= 0); + + /* + * Copy over the offending entry, or just zero it out. + */ + index = drop_blk->index; + btree = dp->d_ops->node_tree_p(node); + if (index < nodehdr.count - 1) { + tmp = nodehdr.count - index - 1; + tmp *= (uint)sizeof(xfs_da_node_entry_t); + memmove(&btree[index], &btree[index + 1], tmp); + xfs_trans_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, &btree[index], tmp)); + index = nodehdr.count - 1; + } + memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); + xfs_trans_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); + nodehdr.count -= 1; + dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_trans_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + + /* + * Copy the last hash value from the block to propagate upwards. + */ + drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); +} + +/* + * Unbalance the elements between two intermediate nodes, + * move all Btree elements from one node into another. + */ +STATIC void +xfs_da3_node_unbalance( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk) +{ + struct xfs_da_intnode *drop_node; + struct xfs_da_intnode *save_node; + struct xfs_da_node_entry *drop_btree; + struct xfs_da_node_entry *save_btree; + struct xfs_da3_icnode_hdr drop_hdr; + struct xfs_da3_icnode_hdr save_hdr; + struct xfs_trans *tp; + int sindex; + int tmp; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_node_unbalance(state->args); + + drop_node = drop_blk->bp->b_addr; + save_node = save_blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); + dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); + drop_btree = dp->d_ops->node_tree_p(drop_node); + save_btree = dp->d_ops->node_tree_p(save_node); + tp = state->args->trans; + + /* + * If the dying block has lower hashvals, then move all the + * elements in the remaining block up to make a hole. + */ + if ((be32_to_cpu(drop_btree[0].hashval) < + be32_to_cpu(save_btree[0].hashval)) || + (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < + be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { + /* XXX: check this - is memmove dst correct? */ + tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); + memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); + + sindex = 0; + xfs_trans_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, &save_btree[0], + (save_hdr.count + drop_hdr.count) * + sizeof(xfs_da_node_entry_t))); + } else { + sindex = save_hdr.count; + xfs_trans_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, &save_btree[sindex], + drop_hdr.count * sizeof(xfs_da_node_entry_t))); + } + + /* + * Move all the B-tree elements from drop_blk to save_blk. + */ + tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); + memcpy(&save_btree[sindex], &drop_btree[0], tmp); + save_hdr.count += drop_hdr.count; + + dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); + xfs_trans_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, &save_node->hdr, + dp->d_ops->node_hdr_size)); + + /* + * Save the last hashval in the remaining block for upward propagation. + */ + save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); +} + +/*======================================================================== + * Routines used for finding things in the Btree. + *========================================================================*/ + +/* + * Walk down the Btree looking for a particular filename, filling + * in the state structure as we go. + * + * We will set the state structure to point to each of the elements + * in each of the nodes where either the hashval is or should be. + * + * We support duplicate hashval's so for each entry in the current + * node that could contain the desired hashval, descend. This is a + * pruned depth-first tree search. + */ +int /* error */ +xfs_da3_node_lookup_int( + struct xfs_da_state *state, + int *result) +{ + struct xfs_da_state_blk *blk; + struct xfs_da_blkinfo *curr; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_da_args *args; + xfs_dablk_t blkno; + xfs_dahash_t hashval; + xfs_dahash_t btreehashval; + int probe; + int span; + int max; + int error; + int retval; + struct xfs_inode *dp = state->args->dp; + + args = state->args; + + /* + * Descend thru the B-tree searching each level for the right + * node to use, until the right hashval is found. + */ + blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; + for (blk = &state->path.blk[0], state->path.active = 1; + state->path.active <= XFS_DA_NODE_MAXDEPTH; + blk++, state->path.active++) { + /* + * Read the next node down in the tree. + */ + blk->blkno = blkno; + error = xfs_da3_node_read(args->trans, args->dp, blkno, + -1, &blk->bp, args->whichfork); + if (error) { + blk->blkno = 0; + state->path.active--; + return error; + } + curr = blk->bp->b_addr; + blk->magic = be16_to_cpu(curr->magic); + + if (blk->magic == XFS_ATTR_LEAF_MAGIC || + blk->magic == XFS_ATTR3_LEAF_MAGIC) { + blk->magic = XFS_ATTR_LEAF_MAGIC; + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); + break; + } + + if (blk->magic == XFS_DIR2_LEAFN_MAGIC || + blk->magic == XFS_DIR3_LEAFN_MAGIC) { + blk->magic = XFS_DIR2_LEAFN_MAGIC; + blk->hashval = xfs_dir2_leafn_lasthash(args->dp, + blk->bp, NULL); + break; + } + + blk->magic = XFS_DA_NODE_MAGIC; + + + /* + * Search an intermediate node for a match. + */ + node = blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + + max = nodehdr.count; + blk->hashval = be32_to_cpu(btree[max - 1].hashval); + + /* + * Binary search. (note: small blocks will skip loop) + */ + probe = span = max / 2; + hashval = args->hashval; + while (span > 4) { + span /= 2; + btreehashval = be32_to_cpu(btree[probe].hashval); + if (btreehashval < hashval) + probe += span; + else if (btreehashval > hashval) + probe -= span; + else + break; + } + ASSERT((probe >= 0) && (probe < max)); + ASSERT((span <= 4) || + (be32_to_cpu(btree[probe].hashval) == hashval)); + + /* + * Since we may have duplicate hashval's, find the first + * matching hashval in the node. + */ + while (probe > 0 && + be32_to_cpu(btree[probe].hashval) >= hashval) { + probe--; + } + while (probe < max && + be32_to_cpu(btree[probe].hashval) < hashval) { + probe++; + } + + /* + * Pick the right block to descend on. + */ + if (probe == max) { + blk->index = max - 1; + blkno = be32_to_cpu(btree[max - 1].before); + } else { + blk->index = probe; + blkno = be32_to_cpu(btree[probe].before); + } + } + + /* + * A leaf block that ends in the hashval that we are interested in + * (final hashval == search hashval) means that the next block may + * contain more entries with the same hashval, shift upward to the + * next leaf and keep searching. + */ + for (;;) { + if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { + retval = xfs_dir2_leafn_lookup_int(blk->bp, args, + &blk->index, state); + } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + retval = xfs_attr3_leaf_lookup_int(blk->bp, args); + blk->index = args->index; + args->blkno = blk->blkno; + } else { + ASSERT(0); + return EFSCORRUPTED; + } + if (((retval == ENOENT) || (retval == ENOATTR)) && + (blk->hashval == args->hashval)) { + error = xfs_da3_path_shift(state, &state->path, 1, 1, + &retval); + if (error) + return error; + if (retval == 0) { + continue; + } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + /* path_shift() gives ENOENT */ + retval = ENOATTR; + } + } + break; + } + *result = retval; + return 0; +} + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Compare two intermediate nodes for "order". + */ +STATIC int +xfs_da3_node_order( + struct xfs_inode *dp, + struct xfs_buf *node1_bp, + struct xfs_buf *node2_bp) +{ + struct xfs_da_intnode *node1; + struct xfs_da_intnode *node2; + struct xfs_da_node_entry *btree1; + struct xfs_da_node_entry *btree2; + struct xfs_da3_icnode_hdr node1hdr; + struct xfs_da3_icnode_hdr node2hdr; + + node1 = node1_bp->b_addr; + node2 = node2_bp->b_addr; + dp->d_ops->node_hdr_from_disk(&node1hdr, node1); + dp->d_ops->node_hdr_from_disk(&node2hdr, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); + + if (node1hdr.count > 0 && node2hdr.count > 0 && + ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || + (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < + be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { + return 1; + } + return 0; +} + +/* + * Link a new block into a doubly linked list of blocks (of whatever type). + */ +int /* error */ +xfs_da3_blk_link( + struct xfs_da_state *state, + struct xfs_da_state_blk *old_blk, + struct xfs_da_state_blk *new_blk) +{ + struct xfs_da_blkinfo *old_info; + struct xfs_da_blkinfo *new_info; + struct xfs_da_blkinfo *tmp_info; + struct xfs_da_args *args; + struct xfs_buf *bp; + int before = 0; + int error; + struct xfs_inode *dp = state->args->dp; + + /* + * Set up environment. + */ + args = state->args; + ASSERT(args != NULL); + old_info = old_blk->bp->b_addr; + new_info = new_blk->bp->b_addr; + ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || + old_blk->magic == XFS_DIR2_LEAFN_MAGIC || + old_blk->magic == XFS_ATTR_LEAF_MAGIC); + + switch (old_blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); + break; + case XFS_DIR2_LEAFN_MAGIC: + before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); + break; + case XFS_DA_NODE_MAGIC: + before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); + break; + } + + /* + * Link blocks in appropriate order. + */ + if (before) { + /* + * Link new block in before existing block. + */ + trace_xfs_da_link_before(args); + new_info->forw = cpu_to_be32(old_blk->blkno); + new_info->back = old_info->back; + if (old_info->back) { + error = xfs_da3_node_read(args->trans, dp, + be32_to_cpu(old_info->back), + -1, &bp, args->whichfork); + if (error) + return error; + ASSERT(bp != NULL); + tmp_info = bp->b_addr; + ASSERT(tmp_info->magic == old_info->magic); + ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); + tmp_info->forw = cpu_to_be32(new_blk->blkno); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); + } + old_info->back = cpu_to_be32(new_blk->blkno); + } else { + /* + * Link new block in after existing block. + */ + trace_xfs_da_link_after(args); + new_info->forw = old_info->forw; + new_info->back = cpu_to_be32(old_blk->blkno); + if (old_info->forw) { + error = xfs_da3_node_read(args->trans, dp, + be32_to_cpu(old_info->forw), + -1, &bp, args->whichfork); + if (error) + return error; + ASSERT(bp != NULL); + tmp_info = bp->b_addr; + ASSERT(tmp_info->magic == old_info->magic); + ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); + tmp_info->back = cpu_to_be32(new_blk->blkno); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); + } + old_info->forw = cpu_to_be32(new_blk->blkno); + } + + xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); + return 0; +} + +/* + * Unlink a block from a doubly linked list of blocks. + */ +STATIC int /* error */ +xfs_da3_blk_unlink( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk) +{ + struct xfs_da_blkinfo *drop_info; + struct xfs_da_blkinfo *save_info; + struct xfs_da_blkinfo *tmp_info; + struct xfs_da_args *args; + struct xfs_buf *bp; + int error; + + /* + * Set up environment. + */ + args = state->args; + ASSERT(args != NULL); + save_info = save_blk->bp->b_addr; + drop_info = drop_blk->bp->b_addr; + ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || + save_blk->magic == XFS_DIR2_LEAFN_MAGIC || + save_blk->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(save_blk->magic == drop_blk->magic); + ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || + (be32_to_cpu(save_info->back) == drop_blk->blkno)); + ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || + (be32_to_cpu(drop_info->back) == save_blk->blkno)); + + /* + * Unlink the leaf block from the doubly linked chain of leaves. + */ + if (be32_to_cpu(save_info->back) == drop_blk->blkno) { + trace_xfs_da_unlink_back(args); + save_info->back = drop_info->back; + if (drop_info->back) { + error = xfs_da3_node_read(args->trans, args->dp, + be32_to_cpu(drop_info->back), + -1, &bp, args->whichfork); + if (error) + return error; + ASSERT(bp != NULL); + tmp_info = bp->b_addr; + ASSERT(tmp_info->magic == save_info->magic); + ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); + tmp_info->forw = cpu_to_be32(save_blk->blkno); + xfs_trans_log_buf(args->trans, bp, 0, + sizeof(*tmp_info) - 1); + } + } else { + trace_xfs_da_unlink_forward(args); + save_info->forw = drop_info->forw; + if (drop_info->forw) { + error = xfs_da3_node_read(args->trans, args->dp, + be32_to_cpu(drop_info->forw), + -1, &bp, args->whichfork); + if (error) + return error; + ASSERT(bp != NULL); + tmp_info = bp->b_addr; + ASSERT(tmp_info->magic == save_info->magic); + ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); + tmp_info->back = cpu_to_be32(save_blk->blkno); + xfs_trans_log_buf(args->trans, bp, 0, + sizeof(*tmp_info) - 1); + } + } + + xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); + return 0; +} + +/* + * Move a path "forward" or "!forward" one block at the current level. + * + * This routine will adjust a "path" to point to the next block + * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the + * Btree, including updating pointers to the intermediate nodes between + * the new bottom and the root. + */ +int /* error */ +xfs_da3_path_shift( + struct xfs_da_state *state, + struct xfs_da_state_path *path, + int forward, + int release, + int *result) +{ + struct xfs_da_state_blk *blk; + struct xfs_da_blkinfo *info; + struct xfs_da_intnode *node; + struct xfs_da_args *args; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr nodehdr; + xfs_dablk_t blkno = 0; + int level; + int error; + struct xfs_inode *dp = state->args->dp; + + trace_xfs_da_path_shift(state->args); + + /* + * Roll up the Btree looking for the first block where our + * current index is not at the edge of the block. Note that + * we skip the bottom layer because we want the sibling block. + */ + args = state->args; + ASSERT(args != NULL); + ASSERT(path != NULL); + ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + level = (path->active-1) - 1; /* skip bottom layer in path */ + for (blk = &path->blk[level]; level >= 0; blk--, level--) { + node = blk->bp->b_addr; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + + if (forward && (blk->index < nodehdr.count - 1)) { + blk->index++; + blkno = be32_to_cpu(btree[blk->index].before); + break; + } else if (!forward && (blk->index > 0)) { + blk->index--; + blkno = be32_to_cpu(btree[blk->index].before); + break; + } + } + if (level < 0) { + *result = ENOENT; /* we're out of our tree */ + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + return 0; + } + + /* + * Roll down the edge of the subtree until we reach the + * same depth we were at originally. + */ + for (blk++, level++; level < path->active; blk++, level++) { + /* + * Release the old block. + * (if it's dirty, trans won't actually let go) + */ + if (release) + xfs_trans_brelse(args->trans, blk->bp); + + /* + * Read the next child block. + */ + blk->blkno = blkno; + error = xfs_da3_node_read(args->trans, dp, blkno, -1, + &blk->bp, args->whichfork); + if (error) + return error; + info = blk->bp->b_addr; + ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || + info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || + info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || + info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); + + + /* + * Note: we flatten the magic number to a single type so we + * don't have to compare against crc/non-crc types elsewhere. + */ + switch (be16_to_cpu(info->magic)) { + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + blk->magic = XFS_DA_NODE_MAGIC; + node = (xfs_da_intnode_t *)info; + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); + blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); + if (forward) + blk->index = 0; + else + blk->index = nodehdr.count - 1; + blkno = be32_to_cpu(btree[blk->index].before); + break; + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + blk->magic = XFS_ATTR_LEAF_MAGIC; + ASSERT(level == path->active-1); + blk->index = 0; + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); + break; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + blk->magic = XFS_DIR2_LEAFN_MAGIC; + ASSERT(level == path->active-1); + blk->index = 0; + blk->hashval = xfs_dir2_leafn_lasthash(args->dp, + blk->bp, NULL); + break; + default: + ASSERT(0); + break; + } + } + *result = 0; + return 0; +} + + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Implement a simple hash on a character string. + * Rotate the hash value by 7 bits, then XOR each character in. + * This is implemented with some source-level loop unrolling. + */ +xfs_dahash_t +xfs_da_hashname(const __uint8_t *name, int namelen) +{ + xfs_dahash_t hash; + + /* + * Do four characters at a time as long as we can. + */ + for (hash = 0; namelen >= 4; namelen -= 4, name += 4) + hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ + (name[3] << 0) ^ rol32(hash, 7 * 4); + + /* + * Now do the rest of the characters. + */ + switch (namelen) { + case 3: + return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ + rol32(hash, 7 * 3); + case 2: + return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); + case 1: + return (name[0] << 0) ^ rol32(hash, 7 * 1); + default: /* case 0: */ + return hash; + } +} + +enum xfs_dacmp +xfs_da_compname( + struct xfs_da_args *args, + const unsigned char *name, + int len) +{ + return (args->namelen == len && memcmp(args->name, name, len) == 0) ? + XFS_CMP_EXACT : XFS_CMP_DIFFERENT; +} + +static xfs_dahash_t +xfs_default_hashname( + struct xfs_name *name) +{ + return xfs_da_hashname(name->name, name->len); +} + +const struct xfs_nameops xfs_default_nameops = { + .hashname = xfs_default_hashname, + .compname = xfs_da_compname +}; + +int +xfs_da_grow_inode_int( + struct xfs_da_args *args, + xfs_fileoff_t *bno, + int count) +{ + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + int w = args->whichfork; + xfs_drfsbno_t nblks = dp->i_d.di_nblocks; + struct xfs_bmbt_irec map, *mapp; + int nmap, error, got, i, mapi; + + /* + * Find a spot in the file space to put the new block. + */ + error = xfs_bmap_first_unused(tp, dp, count, bno, w); + if (error) + return error; + + /* + * Try mapping it in one filesystem block. + */ + nmap = 1; + ASSERT(args->firstblock != NULL); + error = xfs_bmapi_write(tp, dp, *bno, count, + xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, + args->firstblock, args->total, &map, &nmap, + args->flist); + if (error) + return error; + + ASSERT(nmap <= 1); + if (nmap == 1) { + mapp = ↦ + mapi = 1; + } else if (nmap == 0 && count > 1) { + xfs_fileoff_t b; + int c; + + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ + mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + for (b = *bno, mapi = 0; b < *bno + count; ) { + nmap = MIN(XFS_BMAP_MAX_NMAP, count); + c = (int)(*bno + count - b); + error = xfs_bmapi_write(tp, dp, b, c, + xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, + args->firstblock, args->total, + &mapp[mapi], &nmap, args->flist); + if (error) + goto out_free_map; + if (nmap < 1) + break; + mapi += nmap; + b = mapp[mapi - 1].br_startoff + + mapp[mapi - 1].br_blockcount; + } + } else { + mapi = 0; + mapp = NULL; + } + + /* + * Count the blocks we got, make sure it matches the total. + */ + for (i = 0, got = 0; i < mapi; i++) + got += mapp[i].br_blockcount; + if (got != count || mapp[0].br_startoff != *bno || + mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != + *bno + count) { + error = ENOSPC; + goto out_free_map; + } + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; + +out_free_map: + if (mapp != &map) + kmem_free(mapp); + return error; +} + +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ +int +xfs_da_grow_inode( + struct xfs_da_args *args, + xfs_dablk_t *new_blkno) +{ + xfs_fileoff_t bno; + int error; + + trace_xfs_da_grow_inode(args); + + bno = args->geo->leafblk; + error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); + if (!error) + *new_blkno = (xfs_dablk_t)bno; + return error; +} + +/* + * Ick. We need to always be able to remove a btree block, even + * if there's no space reservation because the filesystem is full. + * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. + * It swaps the target block with the last block in the file. The + * last block in the file can always be removed since it can't cause + * a bmap btree split to do that. + */ +STATIC int +xfs_da3_swap_lastblock( + struct xfs_da_args *args, + xfs_dablk_t *dead_blknop, + struct xfs_buf **dead_bufp) +{ + struct xfs_da_blkinfo *dead_info; + struct xfs_da_blkinfo *sib_info; + struct xfs_da_intnode *par_node; + struct xfs_da_intnode *dead_node; + struct xfs_dir2_leaf *dead_leaf2; + struct xfs_da_node_entry *btree; + struct xfs_da3_icnode_hdr par_hdr; + struct xfs_inode *dp; + struct xfs_trans *tp; + struct xfs_mount *mp; + struct xfs_buf *dead_buf; + struct xfs_buf *last_buf; + struct xfs_buf *sib_buf; + struct xfs_buf *par_buf; + xfs_dahash_t dead_hash; + xfs_fileoff_t lastoff; + xfs_dablk_t dead_blkno; + xfs_dablk_t last_blkno; + xfs_dablk_t sib_blkno; + xfs_dablk_t par_blkno; + int error; + int w; + int entno; + int level; + int dead_level; + + trace_xfs_da_swap_lastblock(args); + + dead_buf = *dead_bufp; + dead_blkno = *dead_blknop; + tp = args->trans; + dp = args->dp; + w = args->whichfork; + ASSERT(w == XFS_DATA_FORK); + mp = dp->i_mount; + lastoff = args->geo->freeblk; + error = xfs_bmap_last_before(tp, dp, &lastoff, w); + if (error) + return error; + if (unlikely(lastoff == 0)) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, + mp); + return EFSCORRUPTED; + } + /* + * Read the last block in the btree space. + */ + last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; + error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); + if (error) + return error; + /* + * Copy the last block into the dead buffer and log it. + */ + memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); + xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); + dead_info = dead_buf->b_addr; + /* + * Get values from the moved block. + */ + if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + + dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); + ents = dp->d_ops->leaf_ents_p(dead_leaf2); + dead_level = 0; + dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); + } else { + struct xfs_da3_icnode_hdr deadhdr; + + dead_node = (xfs_da_intnode_t *)dead_info; + dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); + btree = dp->d_ops->node_tree_p(dead_node); + dead_level = deadhdr.level; + dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); + } + sib_buf = par_buf = NULL; + /* + * If the moved block has a left sibling, fix up the pointers. + */ + if ((sib_blkno = be32_to_cpu(dead_info->back))) { + error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + if (error) + goto done; + sib_info = sib_buf->b_addr; + if (unlikely( + be32_to_cpu(sib_info->forw) != last_blkno || + sib_info->magic != dead_info->magic)) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + sib_info->forw = cpu_to_be32(dead_blkno); + xfs_trans_log_buf(tp, sib_buf, + XFS_DA_LOGRANGE(sib_info, &sib_info->forw, + sizeof(sib_info->forw))); + sib_buf = NULL; + } + /* + * If the moved block has a right sibling, fix up the pointers. + */ + if ((sib_blkno = be32_to_cpu(dead_info->forw))) { + error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + if (error) + goto done; + sib_info = sib_buf->b_addr; + if (unlikely( + be32_to_cpu(sib_info->back) != last_blkno || + sib_info->magic != dead_info->magic)) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + sib_info->back = cpu_to_be32(dead_blkno); + xfs_trans_log_buf(tp, sib_buf, + XFS_DA_LOGRANGE(sib_info, &sib_info->back, + sizeof(sib_info->back))); + sib_buf = NULL; + } + par_blkno = args->geo->leafblk; + level = -1; + /* + * Walk down the tree looking for the parent of the moved block. + */ + for (;;) { + error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + if (error) + goto done; + par_node = par_buf->b_addr; + dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); + if (level >= 0 && level != par_hdr.level + 1) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + level = par_hdr.level; + btree = dp->d_ops->node_tree_p(par_node); + for (entno = 0; + entno < par_hdr.count && + be32_to_cpu(btree[entno].hashval) < dead_hash; + entno++) + continue; + if (entno == par_hdr.count) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + par_blkno = be32_to_cpu(btree[entno].before); + if (level == dead_level + 1) + break; + xfs_trans_brelse(tp, par_buf); + par_buf = NULL; + } + /* + * We're in the right parent block. + * Look for the right entry. + */ + for (;;) { + for (; + entno < par_hdr.count && + be32_to_cpu(btree[entno].before) != last_blkno; + entno++) + continue; + if (entno < par_hdr.count) + break; + par_blkno = par_hdr.forw; + xfs_trans_brelse(tp, par_buf); + par_buf = NULL; + if (unlikely(par_blkno == 0)) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + if (error) + goto done; + par_node = par_buf->b_addr; + dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); + if (par_hdr.level != level) { + XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", + XFS_ERRLEVEL_LOW, mp); + error = EFSCORRUPTED; + goto done; + } + btree = dp->d_ops->node_tree_p(par_node); + entno = 0; + } + /* + * Update the parent entry pointing to the moved block. + */ + btree[entno].before = cpu_to_be32(dead_blkno); + xfs_trans_log_buf(tp, par_buf, + XFS_DA_LOGRANGE(par_node, &btree[entno].before, + sizeof(btree[entno].before))); + *dead_blknop = last_blkno; + *dead_bufp = last_buf; + return 0; +done: + if (par_buf) + xfs_trans_brelse(tp, par_buf); + if (sib_buf) + xfs_trans_brelse(tp, sib_buf); + xfs_trans_brelse(tp, last_buf); + return error; +} + +/* + * Remove a btree block from a directory or attribute. + */ +int +xfs_da_shrink_inode( + xfs_da_args_t *args, + xfs_dablk_t dead_blkno, + struct xfs_buf *dead_buf) +{ + xfs_inode_t *dp; + int done, error, w, count; + xfs_trans_t *tp; + xfs_mount_t *mp; + + trace_xfs_da_shrink_inode(args); + + dp = args->dp; + w = args->whichfork; + tp = args->trans; + mp = dp->i_mount; + count = args->geo->fsbcount; + for (;;) { + /* + * Remove extents. If we get ENOSPC for a dir we have to move + * the last block to the place we want to kill. + */ + error = xfs_bunmapi(tp, dp, dead_blkno, count, + xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, + 0, args->firstblock, args->flist, &done); + if (error == ENOSPC) { + if (w != XFS_DATA_FORK) + break; + error = xfs_da3_swap_lastblock(args, &dead_blkno, + &dead_buf); + if (error) + break; + } else { + break; + } + } + xfs_trans_binval(tp, dead_buf); + return error; +} + +/* + * See if the mapping(s) for this btree block are valid, i.e. + * don't contain holes, are logically contiguous, and cover the whole range. + */ +STATIC int +xfs_da_map_covers_blocks( + int nmap, + xfs_bmbt_irec_t *mapp, + xfs_dablk_t bno, + int count) +{ + int i; + xfs_fileoff_t off; + + for (i = 0, off = bno; i < nmap; i++) { + if (mapp[i].br_startblock == HOLESTARTBLOCK || + mapp[i].br_startblock == DELAYSTARTBLOCK) { + return 0; + } + if (off != mapp[i].br_startoff) { + return 0; + } + off += mapp[i].br_blockcount; + } + return off == bno + count; +} + +/* + * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. + * + * For the single map case, it is assumed that the caller has provided a pointer + * to a valid xfs_buf_map. For the multiple map case, this function will + * allocate the xfs_buf_map to hold all the maps and replace the caller's single + * map pointer with the allocated map. + */ +static int +xfs_buf_map_from_irec( + struct xfs_mount *mp, + struct xfs_buf_map **mapp, + int *nmaps, + struct xfs_bmbt_irec *irecs, + int nirecs) +{ + struct xfs_buf_map *map; + int i; + + ASSERT(*nmaps == 1); + ASSERT(nirecs >= 1); + + if (nirecs > 1) { + map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), + KM_SLEEP | KM_NOFS); + if (!map) + return ENOMEM; + *mapp = map; + } + + *nmaps = nirecs; + map = *mapp; + for (i = 0; i < *nmaps; i++) { + ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && + irecs[i].br_startblock != HOLESTARTBLOCK); + map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); + map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); + } + return 0; +} + +/* + * Map the block we are given ready for reading. There are three possible return + * values: + * -1 - will be returned if we land in a hole and mappedbno == -2 so the + * caller knows not to execute a subsequent read. + * 0 - if we mapped the block successfully + * >0 - positive error number if there was an error. + */ +static int +xfs_dabuf_map( + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + int whichfork, + struct xfs_buf_map **map, + int *nmaps) +{ + struct xfs_mount *mp = dp->i_mount; + int nfsb; + int error = 0; + struct xfs_bmbt_irec irec; + struct xfs_bmbt_irec *irecs = &irec; + int nirecs; + + ASSERT(map && *map); + ASSERT(*nmaps == 1); + + if (whichfork == XFS_DATA_FORK) + nfsb = mp->m_dir_geo->fsbcount; + else + nfsb = mp->m_attr_geo->fsbcount; + + /* + * Caller doesn't have a mapping. -2 means don't complain + * if we land in a hole. + */ + if (mappedbno == -1 || mappedbno == -2) { + /* + * Optimize the one-block case. + */ + if (nfsb != 1) + irecs = kmem_zalloc(sizeof(irec) * nfsb, + KM_SLEEP | KM_NOFS); + + nirecs = nfsb; + error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, + &nirecs, xfs_bmapi_aflag(whichfork)); + if (error) + goto out; + } else { + irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); + irecs->br_startoff = (xfs_fileoff_t)bno; + irecs->br_blockcount = nfsb; + irecs->br_state = 0; + nirecs = 1; + } + + if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { + error = mappedbno == -2 ? -1 : EFSCORRUPTED; + if (unlikely(error == EFSCORRUPTED)) { + if (xfs_error_level >= XFS_ERRLEVEL_LOW) { + int i; + xfs_alert(mp, "%s: bno %lld dir: inode %lld", + __func__, (long long)bno, + (long long)dp->i_ino); + for (i = 0; i < *nmaps; i++) { + xfs_alert(mp, +"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", + i, + (long long)irecs[i].br_startoff, + (long long)irecs[i].br_startblock, + (long long)irecs[i].br_blockcount, + irecs[i].br_state); + } + } + XFS_ERROR_REPORT("xfs_da_do_buf(1)", + XFS_ERRLEVEL_LOW, mp); + } + goto out; + } + error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); +out: + if (irecs != &irec) + kmem_free(irecs); + return error; +} + +/* + * Get a buffer for the dir/attr block. + */ +int +xfs_da_get_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int whichfork) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, + mapp, nmap, 0); + error = bp ? bp->b_error : EIO; + if (error) { + xfs_trans_brelse(trans, bp); + goto out_free; + } + + *bpp = bp; + +out_free: + if (mapp != &map) + kmem_free(mapp); + + return error; +} + +/* + * Get a buffer for the dir/attr block, fill in the contents. + */ +int +xfs_da_read_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int whichfork, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + error = xfs_trans_read_buf_map(dp->i_mount, trans, + dp->i_mount->m_ddev_targp, + mapp, nmap, 0, &bp, ops); + if (error) + goto out_free; + + if (whichfork == XFS_ATTR_FORK) + xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); + else + xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); + *bpp = bp; +out_free: + if (mapp != &map) + kmem_free(mapp); + + return error; +} + +/* + * Readahead the dir/attr block. + */ +xfs_daddr_t +xfs_da_reada_buf( + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + int whichfork, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + mappedbno = mapp[0].bm_bn; + xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); + +out_free: + if (mapp != &map) + kmem_free(mapp); + + if (error) + return -1; + return mappedbno; +} diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c new file mode 100644 index 000000000000..c9aee52a37e2 --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -0,0 +1,911 @@ +/* + * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" + +/* + * Shortform directory ops + */ +static int +xfs_dir2_sf_entsize( + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ + + count += len; /* name */ + count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : + sizeof(xfs_dir2_ino4_t); /* ino # */ + return count; +} + +static int +xfs_dir3_sf_entsize( + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); +} + +static struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); +} + +static struct xfs_dir2_sf_entry * +xfs_dir3_sf_nextentry( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); +} + + +/* + * For filetype enabled shortform directories, the file type field is stored at + * the end of the name. Because it's only a single byte, endian conversion is + * not necessary. For non-filetype enable directories, the type is always + * unknown and we never store the value. + */ +static __uint8_t +xfs_dir2_sfe_get_ftype( + struct xfs_dir2_sf_entry *sfep) +{ + return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_sfe_put_ftype( + struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_sfe_get_ftype( + struct xfs_dir2_sf_entry *sfep) +{ + __uint8_t ftype; + + ftype = sfep->name[sfep->namelen]; + if (ftype >= XFS_DIR3_FT_MAX) + return XFS_DIR3_FT_UNKNOWN; + return ftype; +} + +static void +xfs_dir3_sfe_put_ftype( + struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); + + sfep->name[sfep->namelen] = ftype; +} + +/* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide. These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *from) +{ + if (hdr->i8count) + return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; + else + return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *to, + xfs_ino_t ino) +{ + ASSERT((ino & 0xff00000000000000ULL) == 0); + + if (hdr->i8count) + put_unaligned_be64(ino, &to->i8.i); + else + put_unaligned_be32(ino, &to->i4.i); +} + +static xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +static void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. If the entry stores a filetype value, then it + * sits between the name and the inode number. Hence the inode numbers may only + * be accessed through the helpers below. + */ +static xfs_ino_t +xfs_dir2_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); +} + +static void +xfs_dir2_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); +} + +static xfs_ino_t +xfs_dir3_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); +} + +static void +xfs_dir3_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); +} + + +/* + * Directory data block operations + */ + +/* + * For special situations, the dirent size ends up fixed because we always know + * what the size of the entry is. That's true for the "." and "..", and + * therefore we know that they are a fixed size and hence their offsets are + * constant, as is the first entry. + * + * Hence, this calculation is written as a macro to be able to be calculated at + * compile time and so certain offsets can be calculated directly in the + * structure initaliser via the macro. There are two macros - one for dirents + * with ftype and without so there are no unresolvable conditionals in the + * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power + * of 2 and the compiler doesn't reject it (unlike roundup()). + */ +#define XFS_DIR2_DATA_ENTSIZE(n) \ + round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ + sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) + +#define XFS_DIR3_DATA_ENTSIZE(n) \ + round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ + sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ + XFS_DIR2_DATA_ALIGN) + +static int +xfs_dir2_data_entsize( + int n) +{ + return XFS_DIR2_DATA_ENTSIZE(n); +} + +static int +xfs_dir3_data_entsize( + int n) +{ + return XFS_DIR3_DATA_ENTSIZE(n); +} + +static __uint8_t +xfs_dir2_data_get_ftype( + struct xfs_dir2_data_entry *dep) +{ + return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_data_put_ftype( + struct xfs_dir2_data_entry *dep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_data_get_ftype( + struct xfs_dir2_data_entry *dep) +{ + __uint8_t ftype = dep->name[dep->namelen]; + + ASSERT(ftype < XFS_DIR3_FT_MAX); + if (ftype >= XFS_DIR3_FT_MAX) + return XFS_DIR3_FT_UNKNOWN; + return ftype; +} + +static void +xfs_dir3_data_put_ftype( + struct xfs_dir2_data_entry *dep, + __uint8_t type) +{ + ASSERT(type < XFS_DIR3_FT_MAX); + ASSERT(dep->namelen != 0); + + dep->name[dep->namelen] = type; +} + +/* + * Pointer to an entry's tag word. + */ +static __be16 * +xfs_dir2_data_entry_tag_p( + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +static __be16 * +xfs_dir3_data_entry_tag_p( + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * location of . and .. in data space (always block 0) + */ +static struct xfs_dir2_data_entry * +xfs_dir2_data_dot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1) + + XFS_DIR2_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_free * +xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ + return hdr->bestfree; +} + +static struct xfs_dir2_data_free * +xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir3_data_hdr *)hdr)->best_free; +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_unused *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_unused *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + + +/* + * Directory Leaf block operations + */ +static int +xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) +{ + return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ + return lp->__ents; +} + +static int +xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) +{ + return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ + return ((struct xfs_dir3_leaf *)lp)->__ents; +} + +static void +xfs_dir2_leaf_hdr_from_disk( + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.count); + to->stale = be16_to_cpu(from->hdr.stale); + + ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || + to->magic == XFS_DIR2_LEAFN_MAGIC); +} + +static void +xfs_dir2_leaf_hdr_to_disk( + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || + from->magic == XFS_DIR2_LEAFN_MAGIC); + + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.count = cpu_to_be16(from->count); + to->hdr.stale = cpu_to_be16(from->stale); +} + +static void +xfs_dir3_leaf_hdr_from_disk( + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; + + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->count); + to->stale = be16_to_cpu(hdr3->stale); + + ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || + to->magic == XFS_DIR3_LEAFN_MAGIC); +} + +static void +xfs_dir3_leaf_hdr_to_disk( + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; + + ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || + from->magic == XFS_DIR3_LEAFN_MAGIC); + + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->count = cpu_to_be16(from->count); + hdr3->stale = cpu_to_be16(from->stale); +} + + +/* + * Directory/Attribute Node block operations + */ +static struct xfs_da_node_entry * +xfs_da2_node_tree_p(struct xfs_da_intnode *dap) +{ + return dap->__btree; +} + +static struct xfs_da_node_entry * +xfs_da3_node_tree_p(struct xfs_da_intnode *dap) +{ + return ((struct xfs_da3_intnode *)dap)->__btree; +} + +static void +xfs_da2_node_hdr_from_disk( + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.__count); + to->level = be16_to_cpu(from->hdr.__level); +} + +static void +xfs_da2_node_hdr_to_disk( + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + ASSERT(from->magic == XFS_DA_NODE_MAGIC); + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.__count = cpu_to_be16(from->count); + to->hdr.__level = cpu_to_be16(from->level); +} + +static void +xfs_da3_node_hdr_from_disk( + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; + + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->__count); + to->level = be16_to_cpu(hdr3->__level); +} + +static void +xfs_da3_node_hdr_to_disk( + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; + + ASSERT(from->magic == XFS_DA3_NODE_MAGIC); + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->__count = cpu_to_be16(from->count); + hdr3->__level = cpu_to_be16(from->level); +} + + +/* + * Directory free space block operations + */ +static int +xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) +{ + return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir2_free_bests_p(struct xfs_dir2_free *free) +{ + return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir2_free_max_bests(geo)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return db % xfs_dir2_free_max_bests(geo); +} + +static int +xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) +{ + return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir3_free_bests_p(struct xfs_dir2_free *free) +{ + return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir3_free_max_bests(geo)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return db % xfs_dir3_free_max_bests(geo); +} + +static void +xfs_dir2_free_hdr_from_disk( + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + to->magic = be32_to_cpu(from->hdr.magic); + to->firstdb = be32_to_cpu(from->hdr.firstdb); + to->nvalid = be32_to_cpu(from->hdr.nvalid); + to->nused = be32_to_cpu(from->hdr.nused); + ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); +} + +static void +xfs_dir2_free_hdr_to_disk( + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); + + to->hdr.magic = cpu_to_be32(from->magic); + to->hdr.firstdb = cpu_to_be32(from->firstdb); + to->hdr.nvalid = cpu_to_be32(from->nvalid); + to->hdr.nused = cpu_to_be32(from->nused); +} + +static void +xfs_dir3_free_hdr_from_disk( + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; + + to->magic = be32_to_cpu(hdr3->hdr.magic); + to->firstdb = be32_to_cpu(hdr3->firstdb); + to->nvalid = be32_to_cpu(hdr3->nvalid); + to->nused = be32_to_cpu(hdr3->nused); + + ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); +} + +static void +xfs_dir3_free_hdr_to_disk( + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; + + ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); + + hdr3->hdr.magic = cpu_to_be32(from->magic); + hdr3->firstdb = cpu_to_be32(from->firstdb); + hdr3->nvalid = cpu_to_be32(from->nvalid); + hdr3->nused = cpu_to_be32(from->nused); +} + +static const struct xfs_dir_ops xfs_dir2_ops = { + .sf_entsize = xfs_dir2_sf_entsize, + .sf_nextentry = xfs_dir2_sf_nextentry, + .sf_get_ftype = xfs_dir2_sfe_get_ftype, + .sf_put_ftype = xfs_dir2_sfe_put_ftype, + .sf_get_ino = xfs_dir2_sfe_get_ino, + .sf_put_ino = xfs_dir2_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir2_data_entsize, + .data_get_ftype = xfs_dir2_data_get_ftype, + .data_put_ftype = xfs_dir2_data_put_ftype, + .data_entry_tag_p = xfs_dir2_data_entry_tag_p, + .data_bestfree_p = xfs_dir2_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1) + + XFS_DIR2_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + + .data_dot_entry_p = xfs_dir2_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir2_data_first_entry_p, + .data_entry_p = xfs_dir2_data_entry_p, + .data_unused_p = xfs_dir2_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir2_max_leaf_ents, + .leaf_ents_p = xfs_dir2_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), + .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, + .free_max_bests = xfs_dir2_free_max_bests, + .free_bests_p = xfs_dir2_free_bests_p, + .db_to_fdb = xfs_dir2_db_to_fdb, + .db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_ftype_ops = { + .sf_entsize = xfs_dir3_sf_entsize, + .sf_nextentry = xfs_dir3_sf_nextentry, + .sf_get_ftype = xfs_dir3_sfe_get_ftype, + .sf_put_ftype = xfs_dir3_sfe_put_ftype, + .sf_get_ino = xfs_dir3_sfe_get_ino, + .sf_put_ino = xfs_dir3_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir3_data_entsize, + .data_get_ftype = xfs_dir3_data_get_ftype, + .data_put_ftype = xfs_dir3_data_put_ftype, + .data_entry_tag_p = xfs_dir3_data_entry_tag_p, + .data_bestfree_p = xfs_dir2_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + + .data_dot_entry_p = xfs_dir2_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, + .data_entry_p = xfs_dir2_data_entry_p, + .data_unused_p = xfs_dir2_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir2_max_leaf_ents, + .leaf_ents_p = xfs_dir2_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), + .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, + .free_max_bests = xfs_dir2_free_max_bests, + .free_bests_p = xfs_dir2_free_bests_p, + .db_to_fdb = xfs_dir2_db_to_fdb, + .db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir3_ops = { + .sf_entsize = xfs_dir3_sf_entsize, + .sf_nextentry = xfs_dir3_sf_nextentry, + .sf_get_ftype = xfs_dir3_sfe_get_ftype, + .sf_put_ftype = xfs_dir3_sfe_put_ftype, + .sf_get_ino = xfs_dir3_sfe_get_ino, + .sf_put_ino = xfs_dir3_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir3_data_entsize, + .data_get_ftype = xfs_dir3_data_get_ftype, + .data_put_ftype = xfs_dir3_data_put_ftype, + .data_entry_tag_p = xfs_dir3_data_entry_tag_p, + .data_bestfree_p = xfs_dir3_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), + + .data_dot_entry_p = xfs_dir3_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir3_data_first_entry_p, + .data_entry_p = xfs_dir3_data_entry_p, + .data_unused_p = xfs_dir3_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir3_max_leaf_ents, + .leaf_ents_p = xfs_dir3_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da3_node_hdr), + .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, + .node_tree_p = xfs_da3_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), + .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, + .free_max_bests = xfs_dir3_free_max_bests, + .free_bests_p = xfs_dir3_free_bests_p, + .db_to_fdb = xfs_dir3_db_to_fdb, + .db_to_fdindex = xfs_dir3_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_nondir_ops = { + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, +}; + +static const struct xfs_dir_ops xfs_dir3_nondir_ops = { + .node_hdr_size = sizeof(struct xfs_da3_node_hdr), + .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, + .node_tree_p = xfs_da3_node_tree_p, +}; + +/* + * Return the ops structure according to the current config. If we are passed + * an inode, then that overrides the default config we use which is based on + * feature bits. + */ +const struct xfs_dir_ops * +xfs_dir_get_ops( + struct xfs_mount *mp, + struct xfs_inode *dp) +{ + if (dp) + return dp->d_ops; + if (mp->m_dir_inode_ops) + return mp->m_dir_inode_ops; + if (xfs_sb_version_hascrc(&mp->m_sb)) + return &xfs_dir3_ops; + if (xfs_sb_version_hasftype(&mp->m_sb)) + return &xfs_dir2_ftype_ops; + return &xfs_dir2_ops; +} + +const struct xfs_dir_ops * +xfs_nondir_get_ops( + struct xfs_mount *mp, + struct xfs_inode *dp) +{ + if (dp) + return dp->d_ops; + if (mp->m_nondir_inode_ops) + return mp->m_nondir_inode_ops; + if (xfs_sb_version_hascrc(&mp->m_sb)) + return &xfs_dir3_nondir_ops; + return &xfs_dir2_nondir_ops; +} diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c new file mode 100644 index 000000000000..a0aca734199b --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -0,0 +1,762 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_dinode.h" + +struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; + + +/* + * ASCII case-insensitive (ie. A-Z) support for directories that was + * used in IRIX. + */ +STATIC xfs_dahash_t +xfs_ascii_ci_hashname( + struct xfs_name *name) +{ + xfs_dahash_t hash; + int i; + + for (i = 0, hash = 0; i < name->len; i++) + hash = tolower(name->name[i]) ^ rol32(hash, 7); + + return hash; +} + +STATIC enum xfs_dacmp +xfs_ascii_ci_compname( + struct xfs_da_args *args, + const unsigned char *name, + int len) +{ + enum xfs_dacmp result; + int i; + + if (args->namelen != len) + return XFS_CMP_DIFFERENT; + + result = XFS_CMP_EXACT; + for (i = 0; i < len; i++) { + if (args->name[i] == name[i]) + continue; + if (tolower(args->name[i]) != tolower(name[i])) + return XFS_CMP_DIFFERENT; + result = XFS_CMP_CASE; + } + + return result; +} + +static struct xfs_nameops xfs_ascii_ci_nameops = { + .hashname = xfs_ascii_ci_hashname, + .compname = xfs_ascii_ci_compname, +}; + +int +xfs_da_mount( + struct xfs_mount *mp) +{ + struct xfs_da_geometry *dageo; + int nodehdr_size; + + + ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); + ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= + XFS_MAX_BLOCKSIZE); + + mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); + mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); + + nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; + mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + if (!mp->m_dir_geo || !mp->m_attr_geo) { + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); + return ENOMEM; + } + + /* set up directory geometry */ + dageo = mp->m_dir_geo; + dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; + + /* + * Now we've set up the block conversion variables, we can calculate the + * segment block constants using the geometry structure. + */ + dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); + dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); + dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); + dageo->node_ents = (dageo->blksize - nodehdr_size) / + (uint)sizeof(xfs_da_node_entry_t); + dageo->magicpct = (dageo->blksize * 37) / 100; + + /* set up attribute geometry - single fsb only */ + dageo = mp->m_attr_geo; + dageo->blklog = mp->m_sb.sb_blocklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1; + dageo->node_ents = (dageo->blksize - nodehdr_size) / + (uint)sizeof(xfs_da_node_entry_t); + dageo->magicpct = (dageo->blksize * 37) / 100; + + if (xfs_sb_version_hasasciici(&mp->m_sb)) + mp->m_dirnameops = &xfs_ascii_ci_nameops; + else + mp->m_dirnameops = &xfs_default_nameops; + + return 0; +} + +void +xfs_da_unmount( + struct xfs_mount *mp) +{ + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); +} + +/* + * Return 1 if directory contains only "." and "..". + */ +int +xfs_dir_isempty( + xfs_inode_t *dp) +{ + xfs_dir2_sf_hdr_t *sfp; + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + if (dp->i_d.di_size == 0) /* might happen during shutdown. */ + return 1; + if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) + return 0; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + return !sfp->count; +} + +/* + * Validate a given inode number. + */ +int +xfs_dir_ino_validate( + xfs_mount_t *mp, + xfs_ino_t ino) +{ + xfs_agblock_t agblkno; + xfs_agino_t agino; + xfs_agnumber_t agno; + int ino_ok; + int ioff; + + agno = XFS_INO_TO_AGNO(mp, ino); + agblkno = XFS_INO_TO_AGBNO(mp, ino); + ioff = XFS_INO_TO_OFFSET(mp, ino); + agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); + ino_ok = + agno < mp->m_sb.sb_agcount && + agblkno < mp->m_sb.sb_agblocks && + agblkno != 0 && + ioff < (1 << mp->m_sb.sb_inopblog) && + XFS_AGINO_TO_INO(mp, agno, agino) == ino; + if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, + XFS_RANDOM_DIR_INO_VALIDATE))) { + xfs_warn(mp, "Invalid inode number 0x%Lx", + (unsigned long long) ino); + XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + return 0; +} + +/* + * Initialize a directory with its "." and ".." entries. + */ +int +xfs_dir_init( + xfs_trans_t *tp, + xfs_inode_t *dp, + xfs_inode_t *pdp) +{ + struct xfs_da_args *args; + int error; + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); + if (error) + return error; + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->dp = dp; + args->trans = tp; + error = xfs_dir2_sf_create(args, pdp->i_ino); + kmem_free(args); + return error; +} + +/* + Enter a name in a directory. + */ +int +xfs_dir_createname( + xfs_trans_t *tp, + xfs_inode_t *dp, + struct xfs_name *name, + xfs_ino_t inum, /* new entry inode number */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) + return rval; + XFS_STATS_INC(xs_dir_create); + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); + else + rval = xfs_dir2_node_addname(args); + +out_free: + kmem_free(args); + return rval; +} + +/* + * If doing a CI lookup and case-insensitive match, dup actual name into + * args.value. Return EEXIST for success (ie. name found) or an error. + */ +int +xfs_dir_cilookup_result( + struct xfs_da_args *args, + const unsigned char *name, + int len) +{ + if (args->cmpresult == XFS_CMP_DIFFERENT) + return ENOENT; + if (args->cmpresult != XFS_CMP_CASE || + !(args->op_flags & XFS_DA_OP_CILOOKUP)) + return EEXIST; + + args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); + if (!args->value) + return ENOMEM; + + memcpy(args->value, name, len); + args->valuelen = len; + return EEXIST; +} + +/* + * Lookup a name in a directory, give back the inode number. + * If ci_name is not NULL, returns the actual name in ci_name if it differs + * to name, or ci_name->name is set to NULL for an exact match. + */ + +int +xfs_dir_lookup( + xfs_trans_t *tp, + xfs_inode_t *dp, + struct xfs_name *name, + xfs_ino_t *inum, /* out: inode number */ + struct xfs_name *ci_name) /* out: actual name if CI match */ +{ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + XFS_STATS_INC(xs_dir_lookup); + + /* + * We need to use KM_NOFS here so that lockdep will not throw false + * positive deadlock warnings on a non-transactional lookup path. It is + * safe to recurse into inode recalim in that case, but lockdep can't + * easily be taught about it. Hence KM_NOFS avoids having to add more + * lockdep Doing this avoids having to add a bunch of lockdep class + * annotations into the reclaim path for the ilock. + */ + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_OKNOENT; + if (ci_name) + args->op_flags |= XFS_DA_OP_CILOOKUP; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_lookup(args); + else + rval = xfs_dir2_node_lookup(args); + +out_check_rval: + if (rval == EEXIST) + rval = 0; + if (!rval) { + *inum = args->inumber; + if (ci_name) { + ci_name->name = args->value; + ci_name->len = args->valuelen; + } + } +out_free: + kmem_free(args); + return rval; +} + +/* + * Remove an entry from a directory. + */ +int +xfs_dir_removename( + xfs_trans_t *tp, + xfs_inode_t *dp, + struct xfs_name *name, + xfs_ino_t ino, + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + XFS_STATS_INC(xs_dir_remove); + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = ino; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_removename(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_removename(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_removename(args); + else + rval = xfs_dir2_node_removename(args); +out_free: + kmem_free(args); + return rval; +} + +/* + * Replace the inode number of a directory entry. + */ +int +xfs_dir_replace( + xfs_trans_t *tp, + xfs_inode_t *dp, + struct xfs_name *name, /* name of entry to replace */ + xfs_ino_t inum, /* new inode number */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) + return rval; + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_replace(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_replace(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_replace(args); + else + rval = xfs_dir2_node_replace(args); +out_free: + kmem_free(args); + return rval; +} + +/* + * See if this entry can be added to the directory without allocating space. + * First checks that the caller couldn't reserve enough space (resblks = 0). + */ +int +xfs_dir_canenter( + xfs_trans_t *tp, + xfs_inode_t *dp, + struct xfs_name *name, /* name of entry to add */ + uint resblks) +{ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + + if (resblks) + return 0; + + ASSERT(S_ISDIR(dp->i_d.di_mode)); + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | + XFS_DA_OP_OKNOENT; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); + else + rval = xfs_dir2_node_addname(args); +out_free: + kmem_free(args); + return rval; +} + +/* + * Utility routines. + */ + +/* + * Add a block to the directory. + * + * This routine is for data and free blocks, not leaf/node blocks which are + * handled by xfs_da_grow_inode. + */ +int +xfs_dir2_grow_inode( + struct xfs_da_args *args, + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ +{ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + int error; + + trace_xfs_dir2_grow_inode(args, space); + + /* + * Set lowest possible block in the space requested. + */ + bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); + count = args->geo->fsbcount; + + error = xfs_da_grow_inode_int(args, &bno, count); + if (error) + return error; + + *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); + + /* + * Update file's size if this is the data space and it grew. + */ + if (space == XFS_DIR2_DATA_SPACE) { + xfs_fsize_t size; /* directory file (data) size */ + + size = XFS_FSB_TO_B(mp, bno + count); + if (size > dp->i_d.di_size) { + dp->i_d.di_size = size; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + } + } + return 0; +} + +/* + * See if the directory is a single-block form directory. + */ +int +xfs_dir2_isblock( + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ +{ + xfs_fileoff_t last; /* last file offset */ + int rval; + + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) + return rval; + rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; + ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); + *vp = rval; + return 0; +} + +/* + * See if the directory is a single-leaf form directory. + */ +int +xfs_dir2_isleaf( + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ +{ + xfs_fileoff_t last; /* last file offset */ + int rval; + + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) + return rval; + *vp = last == args->geo->leafblk + args->geo->fsbcount; + return 0; +} + +/* + * Remove the given block from the directory. + * This routine is used for data and free blocks, leaf/node are done + * by xfs_da_shrink_inode. + */ +int +xfs_dir2_shrink_inode( + xfs_da_args_t *args, + xfs_dir2_db_t db, + struct xfs_buf *bp) +{ + xfs_fileoff_t bno; /* directory file offset */ + xfs_dablk_t da; /* directory file offset */ + int done; /* bunmap is finished */ + xfs_inode_t *dp; + int error; + xfs_mount_t *mp; + xfs_trans_t *tp; + + trace_xfs_dir2_shrink_inode(args, db); + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + da = xfs_dir2_db_to_da(args->geo, db); + /* + * Unmap the fsblock(s). + */ + if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, + XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, + &done))) { + /* + * ENOSPC actually can happen if we're in a removename with + * no space reservation, and the resulting block removal + * would cause a bmap btree split or conversion from extents + * to btree. This can only happen for un-fragmented + * directory blocks, since you need to be punching out + * the middle of an extent. + * In this case we need to leave the block in the file, + * and not binval it. + * So the block has to be in a consistent empty state + * and appropriately logged. + * We don't free up the buffer, the caller can tell it + * hasn't happened since it got an error back. + */ + return error; + } + ASSERT(done); + /* + * Invalidate the buffer from the transaction. + */ + xfs_trans_binval(tp, bp); + /* + * If it's not a data block, we're done. + */ + if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)) + return 0; + /* + * If the block isn't the last one in the directory, we're done. + */ + if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) + return 0; + bno = da; + if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { + /* + * This can't really happen unless there's kernel corruption. + */ + return error; + } + if (db == args->geo->datablk) + ASSERT(bno == 0); + else + ASSERT(bno > 0); + /* + * Set the size to the new last block. + */ + dp->i_d.di_size = XFS_FSB_TO_B(mp, bno); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c new file mode 100644 index 000000000000..ab0bffccf5c3 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -0,0 +1,1265 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_buf_item.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_dinode.h" + +/* + * Local function prototypes. + */ +static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp, + int first, int last); +static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp); +static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp, + int *entno); +static int xfs_dir2_block_sort(const void *a, const void *b); + +static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; + +/* + * One-time startup routine called from xfs_init(). + */ +void +xfs_dir_startup(void) +{ + xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1); + xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); +} + +static bool +xfs_dir3_block_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) + return false; + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + return false; + } else { + if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) + return false; + } + if (__xfs_dir3_data_check(NULL, bp)) + return false; + return true; +} + +static void +xfs_dir3_block_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_block_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_dir3_block_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_block_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); +} + +const struct xfs_buf_ops xfs_dir3_block_buf_ops = { + .verify_read = xfs_dir3_block_read_verify, + .verify_write = xfs_dir3_block_write_verify, +}; + +int +xfs_dir3_block_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = dp->i_mount; + int err; + + err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, + XFS_DATA_FORK, &xfs_dir3_block_buf_ops); + if (!err && tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); + return err; +} + +static void +xfs_dir3_block_init( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *bp, + struct xfs_inode *dp) +{ + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + bp->b_ops = &xfs_dir3_block_buf_ops; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + memset(hdr3, 0, sizeof(*hdr3)); + hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); + hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->owner = cpu_to_be64(dp->i_ino); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + return; + + } + hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); +} + +static void +xfs_dir2_block_need_space( + struct xfs_inode *dp, + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_block_tail *btp, + struct xfs_dir2_leaf_entry *blp, + __be16 **tagpp, + struct xfs_dir2_data_unused **dupp, + struct xfs_dir2_data_unused **enddupp, + int *compact, + int len) +{ + struct xfs_dir2_data_free *bf; + __be16 *tagp = NULL; + struct xfs_dir2_data_unused *dup = NULL; + struct xfs_dir2_data_unused *enddup = NULL; + + *compact = 0; + bf = dp->d_ops->data_bestfree_p(hdr); + + /* + * If there are stale entries we'll use one for the leaf. + */ + if (btp->stale) { + if (be16_to_cpu(bf[0].length) >= len) { + /* + * The biggest entry enough to avoid compaction. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + goto out; + } + + /* + * Will need to compact to make this work. + * Tag just before the first leaf entry. + */ + *compact = 1; + tagp = (__be16 *)blp - 1; + + /* Data object just before the first leaf entry. */ + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + + /* + * If it's not free then the data will go where the + * leaf data starts now, if it works at all. + */ + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * + (uint)sizeof(*blp) < len) + dup = NULL; + } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) + dup = NULL; + else + dup = (xfs_dir2_data_unused_t *)blp; + goto out; + } + + /* + * no stale entries, so just use free space. + * Tag just before the first leaf entry. + */ + tagp = (__be16 *)blp - 1; + + /* Data object just before the first leaf entry. */ + enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + + /* + * If it's not free then can't do this add without cleaning up: + * the space before the first leaf entry needs to be free so it + * can be expanded to hold the pointer to the new entry. + */ + if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + /* + * Check out the biggest freespace and see if it's the same one. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + if (dup != enddup) { + /* + * Not the same free entry, just check its length. + */ + if (be16_to_cpu(dup->length) < len) + dup = NULL; + goto out; + } + + /* + * It is the biggest freespace, can it hold the leaf too? + */ + if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { + /* + * Yes, use the second-largest entry instead if it works. + */ + if (be16_to_cpu(bf[1].length) >= len) + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[1].offset)); + else + dup = NULL; + } + } +out: + *tagpp = tagp; + *dupp = dup; + *enddupp = enddup; +} + +/* + * compact the leaf entries. + * Leave the highest-numbered stale entry stale. + * XXX should be the one closest to mid but mid is not yet computed. + */ +static void +xfs_dir2_block_compact( + struct xfs_da_args *args, + struct xfs_buf *bp, + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_block_tail *btp, + struct xfs_dir2_leaf_entry *blp, + int *needlog, + int *lfloghigh, + int *lfloglow) +{ + int fromidx; /* source leaf index */ + int toidx; /* target leaf index */ + int needscan = 0; + int highstale; /* high stale index */ + + fromidx = toidx = be32_to_cpu(btp->count) - 1; + highstale = *lfloghigh = -1; + for (; fromidx >= 0; fromidx--) { + if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + if (highstale == -1) + highstale = toidx; + else { + if (*lfloghigh == -1) + *lfloghigh = toidx; + continue; + } + } + if (fromidx < toidx) + blp[toidx] = blp[fromidx]; + toidx--; + } + *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); + *lfloghigh -= be32_to_cpu(btp->stale) - 1; + be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); + xfs_dir2_data_make_free(args, bp, + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), + needlog, &needscan); + btp->stale = cpu_to_be32(1); + /* + * If we now need to rebuild the bestfree map, do so. + * This needs to happen before the next call to use_free. + */ + if (needscan) + xfs_dir2_data_freescan(args->dp, hdr, needlog); +} + +/* + * Add an entry to a block directory. + */ +int /* error */ +xfs_dir2_block_addname( + xfs_da_args_t *args) /* directory op arguments */ +{ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + struct xfs_buf *bp; /* buffer for block */ + xfs_dir2_block_tail_t *btp; /* block tail */ + int compact; /* need to compact leaf ents */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* directory inode */ + xfs_dir2_data_unused_t *dup; /* block unused entry */ + int error; /* error return value */ + xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */ + xfs_dahash_t hash; /* hash value of found entry */ + int high; /* high index for binary srch */ + int highstale; /* high stale index */ + int lfloghigh=0; /* last final leaf to log */ + int lfloglow=0; /* first final leaf to log */ + int len; /* length of the new entry */ + int low; /* low index for binary srch */ + int lowstale; /* low stale index */ + int mid=0; /* midpoint for binary srch */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log header */ + int needscan; /* need to rescan freespace */ + __be16 *tagp; /* pointer to tag value */ + xfs_trans_t *tp; /* transaction structure */ + + trace_xfs_dir2_block_addname(args); + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + + /* Read the (one and only) directory block into bp. */ + error = xfs_dir3_block_read(tp, dp, &bp); + if (error) + return error; + + len = dp->d_ops->data_entsize(args->namelen); + + /* + * Set up pointers to parts of the block. + */ + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + + /* + * Find out if we can reuse stale entries or whether we need extra + * space for entry and new leaf. + */ + xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, + &enddup, &compact, len); + + /* + * Done everything we need for a space check now. + */ + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { + xfs_trans_brelse(tp, bp); + if (!dup) + return ENOSPC; + return 0; + } + + /* + * If we don't have space for the new entry & leaf ... + */ + if (!dup) { + /* Don't have a space reservation: return no-space. */ + if (args->total == 0) + return ENOSPC; + /* + * Convert to the next larger format. + * Then add the new entry in that format. + */ + error = xfs_dir2_block_to_leaf(args, bp); + if (error) + return error; + return xfs_dir2_leaf_addname(args); + } + + needlog = needscan = 0; + + /* + * If need to compact the leaf entries, do it now. + */ + if (compact) { + xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, + &lfloghigh, &lfloglow); + /* recalculate blp post-compaction */ + blp = xfs_dir2_block_leaf_p(btp); + } else if (btp->stale) { + /* + * Set leaf logging boundaries to impossible state. + * For the no-stale case they're set explicitly. + */ + lfloglow = be32_to_cpu(btp->count); + lfloghigh = -1; + } + + /* + * Find the slot that's first lower than our hash value, -1 if none. + */ + for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) { + mid = (low + high) >> 1; + if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) + break; + if (hash < args->hashval) + low = mid + 1; + else + high = mid - 1; + } + while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) { + mid--; + } + /* + * No stale entries, will use enddup space to hold new leaf. + */ + if (!btp->stale) { + /* + * Mark the space needed for the new leaf entry, now in use. + */ + xfs_dir2_data_use_free(args, bp, enddup, + (xfs_dir2_data_aoff_t) + ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - + sizeof(*blp)), + (xfs_dir2_data_aoff_t)sizeof(*blp), + &needlog, &needscan); + /* + * Update the tail (entry count). + */ + be32_add_cpu(&btp->count, 1); + /* + * If we now need to rebuild the bestfree map, do so. + * This needs to happen before the next call to use_free. + */ + if (needscan) { + xfs_dir2_data_freescan(dp, hdr, &needlog); + needscan = 0; + } + /* + * Adjust pointer to the first leaf entry, we're about to move + * the table up one to open up space for the new leaf entry. + * Then adjust our index to match. + */ + blp--; + mid++; + if (mid) + memmove(blp, &blp[1], mid * sizeof(*blp)); + lfloglow = 0; + lfloghigh = mid; + } + /* + * Use a stale leaf for our new entry. + */ + else { + for (lowstale = mid; + lowstale >= 0 && + blp[lowstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + lowstale--) + continue; + for (highstale = mid + 1; + highstale < be32_to_cpu(btp->count) && + blp[highstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && + (lowstale < 0 || mid - lowstale > highstale - mid); + highstale++) + continue; + /* + * Move entries toward the low-numbered stale entry. + */ + if (lowstale >= 0 && + (highstale == be32_to_cpu(btp->count) || + mid - lowstale <= highstale - mid)) { + if (mid - lowstale) + memmove(&blp[lowstale], &blp[lowstale + 1], + (mid - lowstale) * sizeof(*blp)); + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(mid, lfloghigh); + } + /* + * Move entries toward the high-numbered stale entry. + */ + else { + ASSERT(highstale < be32_to_cpu(btp->count)); + mid++; + if (highstale - mid) + memmove(&blp[mid + 1], &blp[mid], + (highstale - mid) * sizeof(*blp)); + lfloglow = MIN(mid, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + be32_add_cpu(&btp->stale, -1); + } + /* + * Point to the new data entry. + */ + dep = (xfs_dir2_data_entry_t *)dup; + /* + * Fill in the leaf entry. + */ + blp[mid].hashval = cpu_to_be32(args->hashval); + blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( + (char *)dep - (char *)hdr)); + xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); + /* + * Mark space for the data entry used. + */ + xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + /* + * Create the new data entry. + */ + dep->inumber = cpu_to_be64(args->inumber); + dep->namelen = args->namelen; + memcpy(dep->name, args->name, args->namelen); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + /* + * Clean up the bestfree array and log the header, tail, and entry. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + if (needlog) + xfs_dir2_data_log_header(args, bp); + xfs_dir2_block_log_tail(tp, bp); + xfs_dir2_data_log_entry(args, bp, dep); + xfs_dir3_data_check(dp, bp); + return 0; +} + +/* + * Log leaf entries from the block. + */ +static void +xfs_dir2_block_log_leaf( + xfs_trans_t *tp, /* transaction structure */ + struct xfs_buf *bp, /* block buffer */ + int first, /* index of first logged leaf */ + int last) /* index of last logged leaf */ +{ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; + + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), + (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); +} + +/* + * Log the block tail. + */ +static void +xfs_dir2_block_log_tail( + xfs_trans_t *tp, /* transaction structure */ + struct xfs_buf *bp) /* block buffer */ +{ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_block_tail_t *btp; + + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); + xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), + (uint)((char *)(btp + 1) - (char *)hdr - 1)); +} + +/* + * Look up an entry in the block. This is the external routine, + * xfs_dir2_block_lookup_int does the real work. + */ +int /* error */ +xfs_dir2_block_lookup( + xfs_da_args_t *args) /* dir lookup arguments */ +{ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + + trace_xfs_dir2_block_lookup(args); + + /* + * Get the buffer, look up the entry. + * If not found (ENOENT) then return, have no buffer. + */ + if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) + return error; + dp = args->dp; + mp = dp->i_mount; + hdr = bp->b_addr; + xfs_dir3_data_check(dp, bp); + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + /* + * Get the offset from the leaf entry, to point to the data. + */ + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); + /* + * Fill in inode number, CI name if appropriate, release the block. + */ + args->inumber = be64_to_cpu(dep->inumber); + args->filetype = dp->d_ops->data_get_ftype(dep); + error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); + xfs_trans_brelse(args->trans, bp); + return error; +} + +/* + * Internal block lookup routine. + */ +static int /* error */ +xfs_dir2_block_lookup_int( + xfs_da_args_t *args, /* dir lookup arguments */ + struct xfs_buf **bpp, /* returned block buffer */ + int *entno) /* returned entry number */ +{ + xfs_dir2_dataptr_t addr; /* data entry address */ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int error; /* error return value */ + xfs_dahash_t hash; /* found hash value */ + int high; /* binary search high index */ + int low; /* binary search low index */ + int mid; /* binary search current idx */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + enum xfs_dacmp cmp; /* comparison result */ + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + + error = xfs_dir3_block_read(tp, dp, &bp); + if (error) + return error; + + hdr = bp->b_addr; + xfs_dir3_data_check(dp, bp); + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + /* + * Loop doing a binary search for our hash value. + * Find our entry, ENOENT if it's not there. + */ + for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) { + ASSERT(low <= high); + mid = (low + high) >> 1; + if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) + break; + if (hash < args->hashval) + low = mid + 1; + else + high = mid - 1; + if (low > high) { + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + xfs_trans_brelse(tp, bp); + return ENOENT; + } + } + /* + * Back up to the first one with the right hash value. + */ + while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) { + mid--; + } + /* + * Now loop forward through all the entries with the + * right hash value looking for our name. + */ + do { + if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Get pointer to the entry from the leaf. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); + /* + * Compare name and if it's an exact match, return the index + * and buffer. If it's the first case-insensitive match, store + * the index and buffer and continue looking for an exact match. + */ + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; + *bpp = bp; + *entno = mid; + if (cmp == XFS_CMP_EXACT) + return 0; + } + } while (++mid < be32_to_cpu(btp->count) && + be32_to_cpu(blp[mid].hashval) == hash); + + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + /* + * Here, we can only be doing a lookup (not a rename or replace). + * If a case-insensitive match was found earlier, return success. + */ + if (args->cmpresult == XFS_CMP_CASE) + return 0; + /* + * No match, release the buffer and return ENOENT. + */ + xfs_trans_brelse(tp, bp); + return ENOENT; +} + +/* + * Remove an entry from a block format directory. + * If that makes the block small enough to fit in shortform, transform it. + */ +int /* error */ +xfs_dir2_block_removename( + xfs_da_args_t *args) /* directory operation args */ +{ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* block leaf entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to fixup bestfree */ + xfs_dir2_sf_hdr_t sfh; /* shortform header */ + int size; /* shortform size */ + xfs_trans_t *tp; /* transaction pointer */ + + trace_xfs_dir2_block_removename(args); + + /* + * Look up the entry in the block. Gets the buffer and entry index. + * It will always be there, the vnodeops level does a lookup first. + */ + if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { + return error; + } + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + /* + * Point to the data entry using the leaf entry. + */ + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); + /* + * Mark the data entry's space free. + */ + needlog = needscan = 0; + xfs_dir2_data_make_free(args, bp, + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + /* + * Fix up the block tail. + */ + be32_add_cpu(&btp->stale, 1); + xfs_dir2_block_log_tail(tp, bp); + /* + * Remove the leaf entry by marking it stale. + */ + blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + xfs_dir2_block_log_leaf(tp, bp, ent, ent); + /* + * Fix up bestfree, log the header if necessary. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + if (needlog) + xfs_dir2_data_log_header(args, bp); + xfs_dir3_data_check(dp, bp); + /* + * See if the size as a shortform is good enough. + */ + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) + return 0; + + /* + * If it works, do the conversion. + */ + return xfs_dir2_block_to_sf(args, bp, size, &sfh); +} + +/* + * Replace an entry in a V2 block directory. + * Change the inode number to the new value. + */ +int /* error */ +xfs_dir2_block_replace( + xfs_da_args_t *args) /* directory operation args */ +{ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* leaf entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + + trace_xfs_dir2_block_replace(args); + + /* + * Lookup the entry in the directory. Get buffer and entry index. + * This will always succeed since the caller has already done a lookup. + */ + if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { + return error; + } + dp = args->dp; + mp = dp->i_mount; + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + /* + * Point to the data entry we need to change. + */ + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); + ASSERT(be64_to_cpu(dep->inumber) != args->inumber); + /* + * Change the inode number to the new value. + */ + dep->inumber = cpu_to_be64(args->inumber); + dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_log_entry(args, bp, dep); + xfs_dir3_data_check(dp, bp); + return 0; +} + +/* + * Qsort comparison routine for the block leaf entries. + */ +static int /* sort order */ +xfs_dir2_block_sort( + const void *a, /* first leaf entry */ + const void *b) /* second leaf entry */ +{ + const xfs_dir2_leaf_entry_t *la; /* first leaf entry */ + const xfs_dir2_leaf_entry_t *lb; /* second leaf entry */ + + la = a; + lb = b; + return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 : + (be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0); +} + +/* + * Convert a V2 leaf directory to a V2 block directory if possible. + */ +int /* error */ +xfs_dir2_leaf_to_block( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *lbp, /* leaf buffer */ + struct xfs_buf *dbp) /* data buffer */ +{ + __be16 *bestsp; /* leaf bests table */ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused data entry */ + int error; /* error return value */ + int from; /* leaf from index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* file system mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to scan for bestfree */ + xfs_dir2_sf_hdr_t sfh; /* shortform header */ + int size; /* bytes used */ + __be16 *tagp; /* end of entry (tag) */ + int to; /* block/leaf to index */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + trace_xfs_dir2_leaf_to_block(args); + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = lbp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + + ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || + leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); + /* + * If there are data blocks other than the first one, take this + * opportunity to remove trailing empty data blocks that may have + * been left behind during no-space-reservation operations. + * These will show up in the leaf bests table. + */ + while (dp->i_d.di_size > args->geo->blksize) { + int hdrsz; + + hdrsz = dp->d_ops->data_entry_offset; + bestsp = xfs_dir2_leaf_bests_p(ltp); + if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == + args->geo->blksize - hdrsz) { + if ((error = + xfs_dir2_leaf_trim_data(args, lbp, + (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) + return error; + } else + return 0; + } + /* + * Read the data block if we don't already have it, give up if it fails. + */ + if (!dbp) { + error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); + if (error) + return error; + } + hdr = dbp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); + + /* + * Size of the "leaf" area in the block. + */ + size = (uint)sizeof(xfs_dir2_block_tail_t) + + (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale); + /* + * Look at the last data entry. + */ + tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + /* + * If it's not free or is too short we can't do it. + */ + if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG || + be16_to_cpu(dup->length) < size) + return 0; + + /* + * Start converting it to block form. + */ + xfs_dir3_block_init(mp, tp, dbp, dp); + + needlog = 1; + needscan = 0; + /* + * Use up the space at the end of the block (blp/btp). + */ + xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, + &needlog, &needscan); + /* + * Initialize the block tail. + */ + btp = xfs_dir2_block_tail_p(args->geo, hdr); + btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale); + btp->stale = 0; + xfs_dir2_block_log_tail(tp, dbp); + /* + * Initialize the block leaf area. We compact out stale entries. + */ + lep = xfs_dir2_block_leaf_p(btp); + for (from = to = 0; from < leafhdr.count; from++) { + if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + continue; + lep[to++] = ents[from]; + } + ASSERT(to == be32_to_cpu(btp->count)); + xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); + /* + * Scan the bestfree if we need it and log the data block header. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + if (needlog) + xfs_dir2_data_log_header(args, dbp); + /* + * Pitch the old leaf block. + */ + error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp); + if (error) + return error; + + /* + * Now see if the resulting block can be shrunken to shortform. + */ + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) + return 0; + + return xfs_dir2_block_to_sf(args, dbp, size, &sfh); +} + +/* + * Convert the shortform directory to block form. + */ +int /* error */ +xfs_dir2_sf_to_block( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dir2_db_t blkno; /* dir-relative block # (0) */ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail pointer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + int dummy; /* trash */ + xfs_dir2_data_unused_t *dup; /* unused entry pointer */ + int endoffset; /* end of data objects */ + int error; /* error return value */ + int i; /* index */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to scan block freespc */ + int newoffset; /* offset from current entry */ + int offset; /* target block offset */ + xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ + xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ + xfs_dir2_sf_hdr_t *sfp; /* shortform header */ + __be16 *tagp; /* end of data entry */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_name name; + struct xfs_ifork *ifp; + + trace_xfs_dir2_sf_to_block(args); + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); + ASSERT(ifp->if_flags & XFS_IFINLINE); + /* + * Bomb out if the shortform directory is way too short. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + return EIO; + } + + oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; + + ASSERT(ifp->if_bytes == dp->i_d.di_size); + ASSERT(ifp->if_u1.if_data != NULL); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); + ASSERT(dp->i_d.di_nextents == 0); + + /* + * Copy the directory into a temporary buffer. + * Then pitch the incore inode data so we can make extents. + */ + sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); + memcpy(sfp, oldsfp, ifp->if_bytes); + + xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); + xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); + dp->i_d.di_size = 0; + + /* + * Add block 0 to the inode. + */ + error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); + if (error) { + kmem_free(sfp); + return error; + } + /* + * Initialize the data block, then convert it to block format. + */ + error = xfs_dir3_data_init(args, blkno, &bp); + if (error) { + kmem_free(sfp); + return error; + } + xfs_dir3_block_init(mp, tp, bp, dp); + hdr = bp->b_addr; + + /* + * Compute size of block "tail" area. + */ + i = (uint)sizeof(*btp) + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + /* + * The whole thing is initialized to free by the init routine. + * Say we're using the leaf and tail area. + */ + dup = dp->d_ops->data_unused_p(hdr); + needlog = needscan = 0; + xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, + i, &needlog, &needscan); + ASSERT(needscan == 0); + /* + * Fill in the tail. + */ + btp = xfs_dir2_block_tail_p(args->geo, hdr); + btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ + btp->stale = 0; + blp = xfs_dir2_block_leaf_p(btp); + endoffset = (uint)((char *)blp - (char *)hdr); + /* + * Remove the freespace, we'll manage it. + */ + xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + be16_to_cpu(dup->length), &needlog, &needscan); + /* + * Create entry for . + */ + dep = dp->d_ops->data_dot_entry_p(hdr); + dep->inumber = cpu_to_be64(dp->i_ino); + dep->namelen = 1; + dep->name[0] = '.'; + dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_log_entry(args, bp, dep); + blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); + blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( + (char *)dep - (char *)hdr)); + /* + * Create entry for .. + */ + dep = dp->d_ops->data_dotdot_entry_p(hdr); + dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); + dep->namelen = 2; + dep->name[0] = dep->name[1] = '.'; + dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_log_entry(args, bp, dep); + blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); + blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( + (char *)dep - (char *)hdr)); + offset = dp->d_ops->data_first_offset; + /* + * Loop over existing entries, stuff them in. + */ + i = 0; + if (!sfp->count) + sfep = NULL; + else + sfep = xfs_dir2_sf_firstentry(sfp); + /* + * Need to preserve the existing offset values in the sf directory. + * Insert holes (unused entries) where necessary. + */ + while (offset < endoffset) { + /* + * sfep is null when we reach the end of the list. + */ + if (sfep == NULL) + newoffset = endoffset; + else + newoffset = xfs_dir2_sf_get_offset(sfep); + /* + * There should be a hole here, make one. + */ + if (offset < newoffset) { + dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + dup->length = cpu_to_be16(newoffset - offset); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( + ((char *)dup - (char *)hdr)); + xfs_dir2_data_log_unused(args, bp, dup); + xfs_dir2_data_freeinsert(hdr, + dp->d_ops->data_bestfree_p(hdr), + dup, &dummy); + offset += be16_to_cpu(dup->length); + continue; + } + /* + * Copy a real entry. + */ + dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); + dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); + dep->namelen = sfep->namelen; + dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); + memcpy(dep->name, sfep->name, dep->namelen); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_log_entry(args, bp, dep); + name.name = sfep->name; + name.len = sfep->namelen; + blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> + hashname(&name)); + blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( + (char *)dep - (char *)hdr)); + offset = (int)((char *)(tagp + 1) - (char *)hdr); + if (++i == sfp->count) + sfep = NULL; + else + sfep = dp->d_ops->sf_nextentry(sfp, sfep); + } + /* Done with the temporary buffer */ + kmem_free(sfp); + /* + * Sort the leaf entries by hash value. + */ + xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort); + /* + * Log the leaf entry area and tail. + * Already logged the header in data_init, ignore needlog. + */ + ASSERT(needscan == 0); + xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); + xfs_dir2_block_log_tail(tp, bp); + xfs_dir3_data_check(dp, bp); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c new file mode 100644 index 000000000000..8c2f6422648e --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -0,0 +1,1050 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_cksum.h" + +/* + * Check the consistency of the data block. + * The input can also be a block-format directory. + * Return 0 is the buffer is good, otherwise an error. + */ +int +__xfs_dir3_data_check( + struct xfs_inode *dp, /* incore inode pointer */ + struct xfs_buf *bp) /* data block's buffer */ +{ + xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ + xfs_dir2_data_free_t *bf; /* bestfree table */ + xfs_dir2_block_tail_t *btp=NULL; /* block tail */ + int count; /* count of entries found */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_entry_t *dep; /* data entry */ + xfs_dir2_data_free_t *dfp; /* bestfree entry */ + xfs_dir2_data_unused_t *dup; /* unused entry */ + char *endp; /* end of useful data */ + int freeseen; /* mask of bestfrees seen */ + xfs_dahash_t hash; /* hash of current name */ + int i; /* leaf index */ + int lastfree; /* last entry was unused */ + xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ + xfs_mount_t *mp; /* filesystem mount point */ + char *p; /* current data position */ + int stale; /* count of stale leaves */ + struct xfs_name name; + const struct xfs_dir_ops *ops; + struct xfs_da_geometry *geo; + + mp = bp->b_target->bt_mount; + geo = mp->m_dir_geo; + + /* + * We can be passed a null dp here from a verifier, so we need to go the + * hard way to get them. + */ + ops = xfs_dir_get_ops(mp, dp); + + hdr = bp->b_addr; + p = (char *)ops->data_entry_p(hdr); + + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + btp = xfs_dir2_block_tail_p(geo, hdr); + lep = xfs_dir2_block_leaf_p(btp); + endp = (char *)lep; + + /* + * The number of leaf entries is limited by the size of the + * block and the amount of space used by the data entries. + * We don't know how much space is used by the data entries yet, + * so just ensure that the count falls somewhere inside the + * block right now. + */ + XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); + break; + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + endp = (char *)hdr + geo->blksize; + break; + default: + XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + /* + * Account for zero bestfree entries. + */ + bf = ops->data_bestfree_p(hdr); + count = lastfree = freeseen = 0; + if (!bf[0].length) { + XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); + freeseen |= 1 << 0; + } + if (!bf[1].length) { + XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); + freeseen |= 1 << 1; + } + if (!bf[2].length) { + XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); + freeseen |= 1 << 2; + } + + XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= + be16_to_cpu(bf[1].length)); + XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= + be16_to_cpu(bf[2].length)); + /* + * Loop over the data/unused entries. + */ + while (p < endp) { + dup = (xfs_dir2_data_unused_t *)p; + /* + * If it's unused, look for the space in the bestfree table. + * If we find it, account for that, else make sure it + * doesn't need to be there. + */ + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + XFS_WANT_CORRUPTED_RETURN(lastfree == 0); + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == + (char *)dup - (char *)hdr); + dfp = xfs_dir2_data_freefind(hdr, bf, dup); + if (dfp) { + i = (int)(dfp - bf); + XFS_WANT_CORRUPTED_RETURN( + (freeseen & (1 << i)) == 0); + freeseen |= 1 << i; + } else { + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(dup->length) <= + be16_to_cpu(bf[2].length)); + } + p += be16_to_cpu(dup->length); + lastfree = 1; + continue; + } + /* + * It's a real entry. Validate the fields. + * If this is a block directory then make sure it's + * in the leaf section of the block. + * The linear search is crude but this is DEBUG code. + */ + dep = (xfs_dir2_data_entry_t *)p; + XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); + XFS_WANT_CORRUPTED_RETURN( + !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(*ops->data_entry_tag_p(dep)) == + (char *)dep - (char *)hdr); + XFS_WANT_CORRUPTED_RETURN( + ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + count++; + lastfree = 0; + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { + addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)hdr)); + name.name = dep->name; + name.len = dep->namelen; + hash = mp->m_dirnameops->hashname(&name); + for (i = 0; i < be32_to_cpu(btp->count); i++) { + if (be32_to_cpu(lep[i].address) == addr && + be32_to_cpu(lep[i].hashval) == hash) + break; + } + XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); + } + p += ops->data_entsize(dep->namelen); + } + /* + * Need to have seen all the entries and all the bestfree slots. + */ + XFS_WANT_CORRUPTED_RETURN(freeseen == 7); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { + for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { + if (lep[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + stale++; + if (i > 0) + XFS_WANT_CORRUPTED_RETURN( + be32_to_cpu(lep[i].hashval) >= + be32_to_cpu(lep[i - 1].hashval)); + } + XFS_WANT_CORRUPTED_RETURN(count == + be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); + XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); + } + return 0; +} + +static bool +xfs_dir3_data_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) + return false; + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + return false; + } else { + if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) + return false; + } + if (__xfs_dir3_data_check(NULL, bp)) + return false; + return true; +} + +/* + * Readahead of the first block of the directory when it is opened is completely + * oblivious to the format of the directory. Hence we can either get a block + * format buffer or a data format buffer on readahead. + */ +static void +xfs_dir3_data_reada_verify( + struct xfs_buf *bp) +{ + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + bp->b_ops = &xfs_dir3_block_buf_ops; + bp->b_ops->verify_read(bp); + return; + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + xfs_dir3_data_verify(bp); + return; + default: + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + break; + } +} + +static void +xfs_dir3_data_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_data_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_dir3_data_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_data_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); +} + +const struct xfs_buf_ops xfs_dir3_data_buf_ops = { + .verify_read = xfs_dir3_data_read_verify, + .verify_write = xfs_dir3_data_write_verify, +}; + +static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { + .verify_read = xfs_dir3_data_reada_verify, + .verify_write = xfs_dir3_data_write_verify, +}; + + +int +xfs_dir3_data_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mapped_bno, + struct xfs_buf **bpp) +{ + int err; + + err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, + XFS_DATA_FORK, &xfs_dir3_data_buf_ops); + if (!err && tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); + return err; +} + +int +xfs_dir3_data_readahead( + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mapped_bno) +{ + return xfs_da_reada_buf(dp, bno, mapped_bno, + XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); +} + +/* + * Given a data block and an unused entry from that block, + * return the bestfree entry if any that corresponds to it. + */ +xfs_dir2_data_free_t * +xfs_dir2_data_freefind( + struct xfs_dir2_data_hdr *hdr, /* data block header */ + struct xfs_dir2_data_free *bf, /* bestfree table pointer */ + struct xfs_dir2_data_unused *dup) /* unused space */ +{ + xfs_dir2_data_free_t *dfp; /* bestfree entry */ + xfs_dir2_data_aoff_t off; /* offset value needed */ +#ifdef DEBUG + int matched; /* matched the value */ + int seenzero; /* saw a 0 bestfree entry */ +#endif + + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + +#ifdef DEBUG + /* + * Validate some consistency in the bestfree table. + * Check order, non-overlapping entries, and if we find the + * one we're looking for it has to be exact. + */ + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + for (dfp = &bf[0], seenzero = matched = 0; + dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; + dfp++) { + if (!dfp->offset) { + ASSERT(!dfp->length); + seenzero = 1; + continue; + } + ASSERT(seenzero == 0); + if (be16_to_cpu(dfp->offset) == off) { + matched = 1; + ASSERT(dfp->length == dup->length); + } else if (off < be16_to_cpu(dfp->offset)) + ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset)); + else + ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); + ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); + if (dfp > &bf[0]) + ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); + } +#endif + /* + * If this is smaller than the smallest bestfree entry, + * it can't be there since they're sorted. + */ + if (be16_to_cpu(dup->length) < + be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) + return NULL; + /* + * Look at the three bestfree entries for our guy. + */ + for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { + if (!dfp->offset) + return NULL; + if (be16_to_cpu(dfp->offset) == off) + return dfp; + } + /* + * Didn't find it. This only happens if there are duplicate lengths. + */ + return NULL; +} + +/* + * Insert an unused-space entry into the bestfree table. + */ +xfs_dir2_data_free_t * /* entry inserted */ +xfs_dir2_data_freeinsert( + struct xfs_dir2_data_hdr *hdr, /* data block pointer */ + struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ + struct xfs_dir2_data_unused *dup, /* unused space */ + int *loghead) /* log the data header (out) */ +{ + xfs_dir2_data_free_t new; /* new bestfree entry */ + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + + new.length = dup->length; + new.offset = cpu_to_be16((char *)dup - (char *)hdr); + + /* + * Insert at position 0, 1, or 2; or not at all. + */ + if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) { + dfp[2] = dfp[1]; + dfp[1] = dfp[0]; + dfp[0] = new; + *loghead = 1; + return &dfp[0]; + } + if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) { + dfp[2] = dfp[1]; + dfp[1] = new; + *loghead = 1; + return &dfp[1]; + } + if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) { + dfp[2] = new; + *loghead = 1; + return &dfp[2]; + } + return NULL; +} + +/* + * Remove a bestfree entry from the table. + */ +STATIC void +xfs_dir2_data_freeremove( + struct xfs_dir2_data_hdr *hdr, /* data block header */ + struct xfs_dir2_data_free *bf, /* bestfree table pointer */ + struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ + int *loghead) /* out: log data header */ +{ + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + + /* + * It's the first entry, slide the next 2 up. + */ + if (dfp == &bf[0]) { + bf[0] = bf[1]; + bf[1] = bf[2]; + } + /* + * It's the second entry, slide the 3rd entry up. + */ + else if (dfp == &bf[1]) + bf[1] = bf[2]; + /* + * Must be the last entry. + */ + else + ASSERT(dfp == &bf[2]); + /* + * Clear the 3rd entry, must be zero now. + */ + bf[2].length = 0; + bf[2].offset = 0; + *loghead = 1; +} + +/* + * Given a data block, reconstruct its bestfree map. + */ +void +xfs_dir2_data_freescan( + struct xfs_inode *dp, + struct xfs_dir2_data_hdr *hdr, + int *loghead) +{ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* active data entry */ + xfs_dir2_data_unused_t *dup; /* unused data entry */ + struct xfs_dir2_data_free *bf; + char *endp; /* end of block's data */ + char *p; /* current entry pointer */ + struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + + /* + * Start by clearing the table. + */ + bf = dp->d_ops->data_bestfree_p(hdr); + memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); + *loghead = 1; + /* + * Set up pointers. + */ + p = (char *)dp->d_ops->data_entry_p(hdr); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { + btp = xfs_dir2_block_tail_p(geo, hdr); + endp = (char *)xfs_dir2_block_leaf_p(btp); + } else + endp = (char *)hdr + geo->blksize; + /* + * Loop over the block's entries. + */ + while (p < endp) { + dup = (xfs_dir2_data_unused_t *)p; + /* + * If it's a free entry, insert it. + */ + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + ASSERT((char *)dup - (char *)hdr == + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); + p += be16_to_cpu(dup->length); + } + /* + * For active entries, check their tags and skip them. + */ + else { + dep = (xfs_dir2_data_entry_t *)p; + ASSERT((char *)dep - (char *)hdr == + be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); + p += dp->d_ops->data_entsize(dep->namelen); + } + } +} + +/* + * Initialize a data block at the given block number in the directory. + * Give back the buffer for the created block. + */ +int /* error */ +xfs_dir3_data_init( + xfs_da_args_t *args, /* directory operation args */ + xfs_dir2_db_t blkno, /* logical dir block number */ + struct xfs_buf **bpp) /* output block buffer */ +{ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused entry pointer */ + struct xfs_dir2_data_free *bf; + int error; /* error return value */ + int i; /* bestfree index */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + int t; /* temp */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Get the buffer set up for the block. + */ + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), + -1, &bp, XFS_DATA_FORK); + if (error) + return error; + bp->b_ops = &xfs_dir3_data_buf_ops; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); + + /* + * Initialize the header. + */ + hdr = bp->b_addr; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + memset(hdr3, 0, sizeof(*hdr3)); + hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); + hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->owner = cpu_to_be64(dp->i_ino); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + + } else + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + + bf = dp->d_ops->data_bestfree_p(hdr); + bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); + for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { + bf[i].length = 0; + bf[i].offset = 0; + } + + /* + * Set up an unused entry for the block's body. + */ + dup = dp->d_ops->data_unused_p(hdr); + dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + + t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; + bf[0].length = cpu_to_be16(t); + dup->length = cpu_to_be16(t); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); + /* + * Log it and return it. + */ + xfs_dir2_data_log_header(args, bp); + xfs_dir2_data_log_unused(args, bp, dup); + *bpp = bp; + return 0; +} + +/* + * Log an active data entry from the block. + */ +void +xfs_dir2_data_log_entry( + struct xfs_da_args *args, + struct xfs_buf *bp, + xfs_dir2_data_entry_t *dep) /* data entry pointer */ +{ + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), + (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - + (char *)hdr - 1)); +} + +/* + * Log a data block header. + */ +void +xfs_dir2_data_log_header( + struct xfs_da_args *args, + struct xfs_buf *bp) +{ +#ifdef DEBUG + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); +#endif + + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->data_entry_offset - 1); +} + +/* + * Log a data unused entry. + */ +void +xfs_dir2_data_log_unused( + struct xfs_da_args *args, + struct xfs_buf *bp, + xfs_dir2_data_unused_t *dup) /* data unused pointer */ +{ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + + /* + * Log the first part of the unused entry. + */ + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), + (uint)((char *)&dup->length + sizeof(dup->length) - + 1 - (char *)hdr)); + /* + * Log the end (tag) of the unused entry. + */ + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + + sizeof(xfs_dir2_data_off_t) - 1)); +} + +/* + * Make a byte range in the data block unused. + * Its current contents are unimportant. + */ +void +xfs_dir2_data_make_free( + struct xfs_da_args *args, + struct xfs_buf *bp, + xfs_dir2_data_aoff_t offset, /* starting byte offset */ + xfs_dir2_data_aoff_t len, /* length in bytes */ + int *needlogp, /* out: log header */ + int *needscanp) /* out: regen bestfree */ +{ + xfs_dir2_data_hdr_t *hdr; /* data block pointer */ + xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + char *endptr; /* end of data area */ + int needscan; /* need to regen bestfree */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *postdup; /* unused entry after us */ + xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ + struct xfs_dir2_data_free *bf; + + hdr = bp->b_addr; + + /* + * Figure out where the end of the data area is. + */ + if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) + endptr = (char *)hdr + args->geo->blksize; + else { + xfs_dir2_block_tail_t *btp; /* block tail */ + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + btp = xfs_dir2_block_tail_p(args->geo, hdr); + endptr = (char *)xfs_dir2_block_leaf_p(btp); + } + /* + * If this isn't the start of the block, then back up to + * the previous entry and see if it's free. + */ + if (offset > args->dp->d_ops->data_entry_offset) { + __be16 *tagp; /* tag just before us */ + + tagp = (__be16 *)((char *)hdr + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) + prevdup = NULL; + } else + prevdup = NULL; + /* + * If this isn't the end of the block, see if the entry after + * us is free. + */ + if ((char *)hdr + offset + len < endptr) { + postdup = + (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) + postdup = NULL; + } else + postdup = NULL; + ASSERT(*needscanp == 0); + needscan = 0; + /* + * Previous and following entries are both free, + * merge everything into a single free entry. + */ + bf = args->dp->d_ops->data_bestfree_p(hdr); + if (prevdup && postdup) { + xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ + + /* + * See if prevdup and/or postdup are in bestfree table. + */ + dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); + dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); + /* + * We need a rescan unless there are exactly 2 free entries + * namely our two. Then we know what's happening, otherwise + * since the third bestfree is there, there might be more + * entries. + */ + needscan = (bf[2].length != 0); + /* + * Fix up the new big freespace. + */ + be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); + *xfs_dir2_data_unused_tag_p(prevdup) = + cpu_to_be16((char *)prevdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, prevdup); + if (!needscan) { + /* + * Has to be the case that entries 0 and 1 are + * dfp and dfp2 (don't know which is which), and + * entry 2 is empty. + * Remove entry 1 first then entry 0. + */ + ASSERT(dfp && dfp2); + if (dfp == &bf[1]) { + dfp = &bf[0]; + ASSERT(dfp2 == dfp); + dfp2 = &bf[1]; + } + xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + /* + * Now insert the new entry. + */ + dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, + needlogp); + ASSERT(dfp == &bf[0]); + ASSERT(dfp->length == prevdup->length); + ASSERT(!dfp[1].length); + ASSERT(!dfp[2].length); + } + } + /* + * The entry before us is free, merge with it. + */ + else if (prevdup) { + dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); + be16_add_cpu(&prevdup->length, len); + *xfs_dir2_data_unused_tag_p(prevdup) = + cpu_to_be16((char *)prevdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, prevdup); + /* + * If the previous entry was in the table, the new entry + * is longer, so it will be in the table too. Remove + * the old one and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); + } + /* + * Otherwise we need a scan if the new entry is big enough. + */ + else { + needscan = be16_to_cpu(prevdup->length) > + be16_to_cpu(bf[2].length); + } + } + /* + * The following entry is free, merge with it. + */ + else if (postdup) { + dfp = xfs_dir2_data_freefind(hdr, bf, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); + *xfs_dir2_data_unused_tag_p(newdup) = + cpu_to_be16((char *)newdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup); + /* + * If the following entry was in the table, the new entry + * is longer, so it will be in the table too. Remove + * the old one and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); + } + /* + * Otherwise we need a scan if the new entry is big enough. + */ + else { + needscan = be16_to_cpu(newdup->length) > + be16_to_cpu(bf[2].length); + } + } + /* + * Neither neighbor is free. Make a new entry. + */ + else { + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + newdup->length = cpu_to_be16(len); + *xfs_dir2_data_unused_tag_p(newdup) = + cpu_to_be16((char *)newdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup); + xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); + } + *needscanp = needscan; +} + +/* + * Take a byte range out of an existing unused space and make it un-free. + */ +void +xfs_dir2_data_use_free( + struct xfs_da_args *args, + struct xfs_buf *bp, + xfs_dir2_data_unused_t *dup, /* unused entry */ + xfs_dir2_data_aoff_t offset, /* starting offset to use */ + xfs_dir2_data_aoff_t len, /* length to use */ + int *needlogp, /* out: need to log header */ + int *needscanp) /* out: need regen bestfree */ +{ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + int matchback; /* matches end of freespace */ + int matchfront; /* matches start of freespace */ + int needscan; /* need to regen bestfree */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ + int oldlen; /* old unused entry's length */ + struct xfs_dir2_data_free *bf; + + hdr = bp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); + ASSERT(offset >= (char *)dup - (char *)hdr); + ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); + ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + /* + * Look up the entry in the bestfree table. + */ + oldlen = be16_to_cpu(dup->length); + bf = args->dp->d_ops->data_bestfree_p(hdr); + dfp = xfs_dir2_data_freefind(hdr, bf, dup); + ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); + /* + * Check for alignment with front and back of the entry. + */ + matchfront = (char *)dup - (char *)hdr == offset; + matchback = (char *)dup + oldlen - (char *)hdr == offset + len; + ASSERT(*needscanp == 0); + needscan = 0; + /* + * If we matched it exactly we just need to get rid of it from + * the bestfree table. + */ + if (matchfront && matchback) { + if (dfp) { + needscan = (bf[2].offset != 0); + if (!needscan) + xfs_dir2_data_freeremove(hdr, bf, dfp, + needlogp); + } + } + /* + * We match the first part of the entry. + * Make a new entry with the remaining freespace. + */ + else if (matchfront) { + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + newdup->length = cpu_to_be16(oldlen - len); + *xfs_dir2_data_unused_tag_p(newdup) = + cpu_to_be16((char *)newdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup); + /* + * If it was in the table, remove it and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); + ASSERT(dfp != NULL); + ASSERT(dfp->length == newdup->length); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + /* + * If we got inserted at the last slot, + * that means we don't know if there was a better + * choice for the last slot, or not. Rescan. + */ + needscan = dfp == &bf[2]; + } + } + /* + * We match the last part of the entry. + * Trim the allocated space off the tail of the entry. + */ + else if (matchback) { + newdup = dup; + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + *xfs_dir2_data_unused_tag_p(newdup) = + cpu_to_be16((char *)newdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup); + /* + * If it was in the table, remove it and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); + ASSERT(dfp != NULL); + ASSERT(dfp->length == newdup->length); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + /* + * If we got inserted at the last slot, + * that means we don't know if there was a better + * choice for the last slot, or not. Rescan. + */ + needscan = dfp == &bf[2]; + } + } + /* + * Poking out the middle of an entry. + * Make two new entries. + */ + else { + newdup = dup; + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); + *xfs_dir2_data_unused_tag_p(newdup) = + cpu_to_be16((char *)newdup - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup); + newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); + newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); + newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); + *xfs_dir2_data_unused_tag_p(newdup2) = + cpu_to_be16((char *)newdup2 - (char *)hdr); + xfs_dir2_data_log_unused(args, bp, newdup2); + /* + * If the old entry was in the table, we need to scan + * if the 3rd entry was valid, since these entries + * are smaller than the old one. + * If we don't need to scan that means there were 1 or 2 + * entries in the table, and removing the old and adding + * the 2 new will work. + */ + if (dfp) { + needscan = (bf[2].length != 0); + if (!needscan) { + xfs_dir2_data_freeremove(hdr, bf, dfp, + needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup2, + needlogp); + } + } + } + *needscanp = needscan; +} diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c new file mode 100644 index 000000000000..78b411bfc543 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -0,0 +1,1831 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_cksum.h" + +/* + * Local function declarations. + */ +static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, + int *indexp, struct xfs_buf **dbpp); +static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, + struct xfs_buf *bp, int first, int last); +static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, + struct xfs_buf *bp); + +/* + * Check the internal consistency of a leaf1 block. + * Pop an assert if something is wrong. + */ +#ifdef DEBUG +#define xfs_dir3_leaf_check(dp, bp) \ +do { \ + if (!xfs_dir3_leaf1_check((dp), (bp))) \ + ASSERT(0); \ +} while (0); + +STATIC bool +xfs_dir3_leaf1_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { + struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + return false; + } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) + return false; + + return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); +} +#else +#define xfs_dir3_leaf_check(dp, bp) +#endif + +bool +xfs_dir3_leaf_check_int( + struct xfs_mount *mp, + struct xfs_inode *dp, + struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf) +{ + struct xfs_dir2_leaf_entry *ents; + xfs_dir2_leaf_tail_t *ltp; + int stale; + int i; + const struct xfs_dir_ops *ops; + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_da_geometry *geo = mp->m_dir_geo; + + /* + * we can be passed a null dp here from a verifier, so we need to go the + * hard way to get them. + */ + ops = xfs_dir_get_ops(mp, dp); + + if (!hdr) { + ops->leaf_hdr_from_disk(&leafhdr, leaf); + hdr = &leafhdr; + } + + ents = ops->leaf_ents_p(leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); + + /* + * XXX (dgc): This value is not restrictive enough. + * Should factor in the size of the bests table as well. + * We can deduce a value for that from di_size. + */ + if (hdr->count > ops->leaf_max_ents(geo)) + return false; + + /* Leaves and bests don't overlap in leaf format. */ + if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR3_LEAF1_MAGIC) && + (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) + return false; + + /* Check hash value order, count stale entries. */ + for (i = stale = 0; i < hdr->count; i++) { + if (i + 1 < hdr->count) { + if (be32_to_cpu(ents[i].hashval) > + be32_to_cpu(ents[i + 1].hashval)) + return false; + } + if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + stale++; + } + if (hdr->stale != stale) + return false; + return true; +} + +/* + * We verify the magic numbers before decoding the leaf header so that on debug + * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due + * to incorrect magic numbers. + */ +static bool +xfs_dir3_leaf_verify( + struct xfs_buf *bp, + __uint16_t magic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_leaf *leaf = bp->b_addr; + + ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; + __uint16_t magic3; + + magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC + : XFS_DIR3_LEAFN_MAGIC; + + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) + return false; + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + return false; + } else { + if (leaf->hdr.info.magic != cpu_to_be16(magic)) + return false; + } + + return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); +} + +static void +__read_verify( + struct xfs_buf *bp, + __uint16_t magic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_leaf_verify(bp, magic)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +__write_verify( + struct xfs_buf *bp, + __uint16_t magic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_leaf_verify(bp, magic)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); +} + +static void +xfs_dir3_leaf1_read_verify( + struct xfs_buf *bp) +{ + __read_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + +static void +xfs_dir3_leaf1_write_verify( + struct xfs_buf *bp) +{ + __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + +static void +xfs_dir3_leafn_read_verify( + struct xfs_buf *bp) +{ + __read_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + +static void +xfs_dir3_leafn_write_verify( + struct xfs_buf *bp) +{ + __write_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + +const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { + .verify_read = xfs_dir3_leaf1_read_verify, + .verify_write = xfs_dir3_leaf1_write_verify, +}; + +const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { + .verify_read = xfs_dir3_leafn_read_verify, + .verify_write = xfs_dir3_leafn_write_verify, +}; + +static int +xfs_dir3_leaf_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + int err; + + err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); + if (!err && tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); + return err; +} + +int +xfs_dir3_leafn_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + int err; + + err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); + if (!err && tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); + return err; +} + +/* + * Initialize a new leaf block, leaf1 or leafn magic accepted. + */ +static void +xfs_dir3_leaf_init( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *bp, + xfs_ino_t owner, + __uint16_t type) +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + + ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; + + memset(leaf3, 0, sizeof(*leaf3)); + + leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC) + ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) + : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); + leaf3->info.blkno = cpu_to_be64(bp->b_bn); + leaf3->info.owner = cpu_to_be64(owner); + uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid); + } else { + memset(leaf, 0, sizeof(*leaf)); + leaf->hdr.info.magic = cpu_to_be16(type); + } + + /* + * If it's a leaf-format directory initialize the tail. + * Caller is responsible for initialising the bests table. + */ + if (type == XFS_DIR2_LEAF1_MAGIC) { + struct xfs_dir2_leaf_tail *ltp; + + ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf); + ltp->bestcount = 0; + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); + } else { + bp->b_ops = &xfs_dir3_leafn_buf_ops; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); + } +} + +int +xfs_dir3_leaf_get_buf( + xfs_da_args_t *args, + xfs_dir2_db_t bno, + struct xfs_buf **bpp, + __uint16_t magic) +{ + struct xfs_inode *dp = args->dp; + struct xfs_trans *tp = args->trans; + struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *bp; + int error; + + ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); + ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && + bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); + + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), + -1, &bp, XFS_DATA_FORK); + if (error) + return error; + + xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); + xfs_dir3_leaf_log_header(args, bp); + if (magic == XFS_DIR2_LEAF1_MAGIC) + xfs_dir3_leaf_log_tail(args, bp); + *bpp = bp; + return 0; +} + +/* + * Convert a block form directory to a leaf form directory. + */ +int /* error */ +xfs_dir2_block_to_leaf( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *dbp) /* input block's buffer */ +{ + __be16 *bestsp; /* leaf's bestsp entries */ + xfs_dablk_t blkno; /* leaf block's bno */ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ + xfs_dir2_block_tail_t *btp; /* block's tail */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + struct xfs_buf *lbp; /* leaf block's buffer */ + xfs_dir2_db_t ldb; /* leaf block's bno */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to rescan bestfree */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_data_free *bf; + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + trace_xfs_dir2_block_to_leaf(args); + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Add the leaf block to the inode. + * This interface will only put blocks in the leaf/node range. + * Since that's empty now, we'll get the root (block 0 in range). + */ + if ((error = xfs_da_grow_inode(args, &blkno))) { + return error; + } + ldb = xfs_dir2_da_to_db(args->geo, blkno); + ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)); + /* + * Initialize the leaf block, get a buffer for it. + */ + error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC); + if (error) + return error; + + leaf = lbp->b_addr; + hdr = dbp->b_addr; + xfs_dir3_data_check(dp, dbp); + btp = xfs_dir2_block_tail_p(args->geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + bf = dp->d_ops->data_bestfree_p(hdr); + ents = dp->d_ops->leaf_ents_p(leaf); + + /* + * Set the counts in the leaf header. + */ + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + leafhdr.count = be32_to_cpu(btp->count); + leafhdr.stale = be32_to_cpu(btp->stale); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, lbp); + + /* + * Could compact these but I think we always do the conversion + * after squeezing out stale entries. + */ + memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); + needscan = 0; + needlog = 1; + /* + * Make the space formerly occupied by the leaf entries and block + * tail be free. + */ + xfs_dir2_data_make_free(args, dbp, + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize - + (char *)blp), + &needlog, &needscan); + /* + * Fix up the block header, make it a data block. + */ + dbp->b_ops = &xfs_dir3_data_buf_ops; + xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + else + hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); + + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + /* + * Set up leaf tail and bests table. + */ + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ltp->bestcount = cpu_to_be32(1); + bestsp = xfs_dir2_leaf_bests_p(ltp); + bestsp[0] = bf[0].length; + /* + * Log the data header and leaf bests table. + */ + if (needlog) + xfs_dir2_data_log_header(args, dbp); + xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_data_check(dp, dbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, 0); + return 0; +} + +STATIC void +xfs_dir3_leaf_find_stale( + struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_dir2_leaf_entry *ents, + int index, + int *lowstale, + int *highstale) +{ + /* + * Find the first stale entry before our index, if any. + */ + for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { + if (ents[*lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + } + + /* + * Find the first stale entry at or after our index, if any. + * Stop if the result would require moving more entries than using + * lowstale. + */ + for (*highstale = index; *highstale < leafhdr->count; ++*highstale) { + if (ents[*highstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + if (*lowstale >= 0 && index - *lowstale <= *highstale - index) + break; + } +} + +struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_find_entry( + struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_dir2_leaf_entry *ents, + int index, /* leaf table position */ + int compact, /* need to compact leaves */ + int lowstale, /* index of prev stale leaf */ + int highstale, /* index of next stale leaf */ + int *lfloglow, /* low leaf logging index */ + int *lfloghigh) /* high leaf logging index */ +{ + if (!leafhdr->stale) { + xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ + + /* + * Now we need to make room to insert the leaf entry. + * + * If there are no stale entries, just insert a hole at index. + */ + lep = &ents[index]; + if (index < leafhdr->count) + memmove(lep + 1, lep, + (leafhdr->count - index) * sizeof(*lep)); + + /* + * Record low and high logging indices for the leaf. + */ + *lfloglow = index; + *lfloghigh = leafhdr->count++; + return lep; + } + + /* + * There are stale entries. + * + * We will use one of them for the new entry. It's probably not at + * the right location, so we'll have to shift some up or down first. + * + * If we didn't compact before, we need to find the nearest stale + * entries before and after our insertion point. + */ + if (compact == 0) + xfs_dir3_leaf_find_stale(leafhdr, ents, index, + &lowstale, &highstale); + + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == leafhdr->count || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(ents[lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries up to cover the stale entry and make room + * for the new entry. + */ + if (index - lowstale - 1 > 0) { + memmove(&ents[lowstale], &ents[lowstale + 1], + (index - lowstale - 1) * + sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(lowstale, *lfloglow); + *lfloghigh = MAX(index - 1, *lfloghigh); + leafhdr->stale--; + return &ents[index - 1]; + } + + /* + * The high one is better, so use that one. + */ + ASSERT(highstale - index >= 0); + ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries down to cover the stale entry and make room for the + * new entry. + */ + if (highstale - index > 0) { + memmove(&ents[index + 1], &ents[index], + (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(index, *lfloglow); + *lfloghigh = MAX(highstale, *lfloghigh); + leafhdr->stale--; + return &ents[index]; +} + +/* + * Add an entry to a leaf form directory. + */ +int /* error */ +xfs_dir2_leaf_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + __be16 *bestsp; /* freespace table in leaf */ + int compact; /* need to compact leaves */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* data unused entry */ + int error; /* error return value */ + int grown; /* allocated new data block */ + int highstale; /* index of next stale leaf */ + int i; /* temporary, index */ + int index; /* leaf table position */ + struct xfs_buf *lbp; /* leaf's buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int length; /* length of new entry */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ + int lfloglow; /* low leaf logging index */ + int lfloghigh; /* high leaf logging index */ + int lowstale; /* index of prev stale leaf */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ + xfs_mount_t *mp; /* filesystem mount point */ + int needbytes; /* leaf block bytes needed */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data free */ + __be16 *tagp; /* end of data entry */ + xfs_trans_t *tp; /* transaction pointer */ + xfs_dir2_db_t use_block; /* data block number */ + struct xfs_dir2_data_free *bf; /* bestfree table */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + trace_xfs_dir2_leaf_addname(args); + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); + if (error) + return error; + + /* + * Look up the entry by hash value and name. + * We know it's not there, our caller has already done a lookup. + * So the index is of the entry to insert in front of. + * But if there are dup hash values the index is of the first of those. + */ + index = xfs_dir2_leaf_search_hash(args, lbp); + leaf = lbp->b_addr; + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); + length = dp->d_ops->data_entsize(args->namelen); + + /* + * See if there are any entries with the same hash value + * and space in their block for the new entry. + * This is good because it puts multiple same-hash value entries + * in a data block, improving the lookup of those entries. + */ + for (use_block = -1, lep = &ents[index]; + index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; + index++, lep++) { + if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) + continue; + i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + ASSERT(i < be32_to_cpu(ltp->bestcount)); + ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); + if (be16_to_cpu(bestsp[i]) >= length) { + use_block = i; + break; + } + } + /* + * Didn't find a block yet, linear search all the data blocks. + */ + if (use_block == -1) { + for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) { + /* + * Remember a block we see that's missing. + */ + if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && + use_block == -1) + use_block = i; + else if (be16_to_cpu(bestsp[i]) >= length) { + use_block = i; + break; + } + } + } + /* + * How many bytes do we need in the leaf block? + */ + needbytes = 0; + if (!leafhdr.stale) + needbytes += sizeof(xfs_dir2_leaf_entry_t); + if (use_block == -1) + needbytes += sizeof(xfs_dir2_data_off_t); + + /* + * Now kill use_block if it refers to a missing block, so we + * can use it as an indication of allocation needed. + */ + if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) + use_block = -1; + /* + * If we don't have enough free bytes but we can make enough + * by compacting out stale entries, we'll do that. + */ + if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes && + leafhdr.stale > 1) + compact = 1; + + /* + * Otherwise if we don't have enough free bytes we need to + * convert to node form. + */ + else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) { + /* + * Just checking or no space reservation, give up. + */ + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || + args->total == 0) { + xfs_trans_brelse(tp, lbp); + return ENOSPC; + } + /* + * Convert to node form. + */ + error = xfs_dir2_leaf_to_node(args, lbp); + if (error) + return error; + /* + * Then add the new entry. + */ + return xfs_dir2_node_addname(args); + } + /* + * Otherwise it will fit without compaction. + */ + else + compact = 0; + /* + * If just checking, then it will fit unless we needed to allocate + * a new data block. + */ + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { + xfs_trans_brelse(tp, lbp); + return use_block == -1 ? ENOSPC : 0; + } + /* + * If no allocations are allowed, return now before we've + * changed anything. + */ + if (args->total == 0 && use_block == -1) { + xfs_trans_brelse(tp, lbp); + return ENOSPC; + } + /* + * Need to compact the leaf entries, removing stale ones. + * Leave one stale entry behind - the one closest to our + * insertion index - and we'll shift that one to our insertion + * point later. + */ + if (compact) { + xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, + &highstale, &lfloglow, &lfloghigh); + } + /* + * There are stale entries, so we'll need log-low and log-high + * impossibly bad values later. + */ + else if (leafhdr.stale) { + lfloglow = leafhdr.count; + lfloghigh = -1; + } + /* + * If there was no data block space found, we need to allocate + * a new one. + */ + if (use_block == -1) { + /* + * Add the new data block. + */ + if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, + &use_block))) { + xfs_trans_brelse(tp, lbp); + return error; + } + /* + * Initialize the block. + */ + if ((error = xfs_dir3_data_init(args, use_block, &dbp))) { + xfs_trans_brelse(tp, lbp); + return error; + } + /* + * If we're adding a new data block on the end we need to + * extend the bests table. Copy it up one entry. + */ + if (use_block >= be32_to_cpu(ltp->bestcount)) { + bestsp--; + memmove(&bestsp[0], &bestsp[1], + be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); + be32_add_cpu(<p->bestcount, 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); + } + /* + * If we're filling in a previously empty block just log it. + */ + else + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + bestsp[use_block] = bf[0].length; + grown = 1; + } else { + /* + * Already had space in some data block. + * Just read that one in. + */ + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, use_block), + -1, &dbp); + if (error) { + xfs_trans_brelse(tp, lbp); + return error; + } + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + grown = 0; + } + /* + * Point to the biggest freespace in our data block. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + ASSERT(be16_to_cpu(dup->length) >= length); + needscan = needlog = 0; + /* + * Mark the initial part of our freespace in use for the new entry. + */ + xfs_dir2_data_use_free(args, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + &needlog, &needscan); + /* + * Initialize our new entry (at last). + */ + dep = (xfs_dir2_data_entry_t *)dup; + dep->inumber = cpu_to_be64(args->inumber); + dep->namelen = args->namelen; + memcpy(dep->name, args->name, dep->namelen); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + /* + * Need to scan fix up the bestfree table. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + /* + * Need to log the data block's header. + */ + if (needlog) + xfs_dir2_data_log_header(args, dbp); + xfs_dir2_data_log_entry(args, dbp, dep); + /* + * If the bests table needs to be changed, do it. + * Log the change unless we've already done that. + */ + if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) { + bestsp[use_block] = bf[0].length; + if (!grown) + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); + } + + lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + + /* + * Fill in the new leaf entry. + */ + lep->hashval = cpu_to_be32(args->hashval); + lep->address = cpu_to_be32( + xfs_dir2_db_off_to_dataptr(args->geo, use_block, + be16_to_cpu(*tagp))); + /* + * Log the leaf fields and give up the buffers. + */ + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_data_check(dp, dbp); + return 0; +} + +/* + * Compact out any stale entries in the leaf. + * Log the header and changed leaf entries, if any. + */ +void +xfs_dir3_leaf_compact( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_buf *bp) /* leaf buffer */ +{ + int from; /* source leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int loglow; /* first leaf entry to log */ + int to; /* target leaf index */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_inode *dp = args->dp; + + leaf = bp->b_addr; + if (!leafhdr->stale) + return; + + /* + * Compress out the stale entries in place. + */ + ents = dp->d_ops->leaf_ents_p(leaf); + for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { + if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + continue; + /* + * Only actually copy the entries that are different. + */ + if (from > to) { + if (loglow == -1) + loglow = to; + ents[to] = ents[from]; + } + to++; + } + /* + * Update and log the header, log the leaf entries. + */ + ASSERT(leafhdr->stale == from - to); + leafhdr->count -= leafhdr->stale; + leafhdr->stale = 0; + + dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); + xfs_dir3_leaf_log_header(args, bp); + if (loglow != -1) + xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); +} + +/* + * Compact the leaf entries, removing stale ones. + * Leave one stale entry behind - the one closest to our + * insertion index - and the caller will shift that one to our insertion + * point later. + * Return new insertion index, where the remaining stale entry is, + * and leaf logging indices. + */ +void +xfs_dir3_leaf_compact_x1( + struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_dir2_leaf_entry *ents, + int *indexp, /* insertion index */ + int *lowstalep, /* out: stale entry before us */ + int *highstalep, /* out: stale entry after us */ + int *lowlogp, /* out: low log index */ + int *highlogp) /* out: high log index */ +{ + int from; /* source copy index */ + int highstale; /* stale entry at/after index */ + int index; /* insertion index */ + int keepstale; /* source index of kept stale */ + int lowstale; /* stale entry before index */ + int newindex=0; /* new insertion index */ + int to; /* destination copy index */ + + ASSERT(leafhdr->stale > 1); + index = *indexp; + + xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale); + + /* + * Pick the better of lowstale and highstale. + */ + if (lowstale >= 0 && + (highstale == leafhdr->count || + index - lowstale <= highstale - index)) + keepstale = lowstale; + else + keepstale = highstale; + /* + * Copy the entries in place, removing all the stale entries + * except keepstale. + */ + for (from = to = 0; from < leafhdr->count; from++) { + /* + * Notice the new value of index. + */ + if (index == from) + newindex = to; + if (from != keepstale && + ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + if (from == to) + *lowlogp = to; + continue; + } + /* + * Record the new keepstale value for the insertion. + */ + if (from == keepstale) + lowstale = highstale = to; + /* + * Copy only the entries that have moved. + */ + if (from > to) + ents[to] = ents[from]; + to++; + } + ASSERT(from > to); + /* + * If the insertion point was past the last entry, + * set the new insertion point accordingly. + */ + if (index == from) + newindex = to; + *indexp = newindex; + /* + * Adjust the leaf header values. + */ + leafhdr->count -= from - to; + leafhdr->stale = 1; + /* + * Remember the low/high stale value only in the "right" + * direction. + */ + if (lowstale >= newindex) + lowstale = -1; + else + highstale = leafhdr->count; + *highlogp = leafhdr->count - 1; + *lowstalep = lowstale; + *highstalep = highstale; +} + +/* + * Log the bests entries indicated from a leaf1 block. + */ +static void +xfs_dir3_leaf_log_bests( + struct xfs_da_args *args, + struct xfs_buf *bp, /* leaf buffer */ + int first, /* first entry to log */ + int last) /* last entry to log */ +{ + __be16 *firstb; /* pointer to first entry */ + __be16 *lastb; /* pointer to last entry */ + struct xfs_dir2_leaf *leaf = bp->b_addr; + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); + + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + firstb = xfs_dir2_leaf_bests_p(ltp) + first; + lastb = xfs_dir2_leaf_bests_p(ltp) + last; + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstb - (char *)leaf), + (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); +} + +/* + * Log the leaf entries indicated from a leaf1 or leafn block. + */ +void +xfs_dir3_leaf_log_ents( + struct xfs_da_args *args, + struct xfs_buf *bp, + int first, + int last) +{ + xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ + xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ + struct xfs_dir2_leaf *leaf = bp->b_addr; + struct xfs_dir2_leaf_entry *ents; + + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); + + ents = args->dp->d_ops->leaf_ents_p(leaf); + firstlep = &ents[first]; + lastlep = &ents[last]; + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstlep - (char *)leaf), + (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); +} + +/* + * Log the header of the leaf1 or leafn block. + */ +void +xfs_dir3_leaf_log_header( + struct xfs_da_args *args, + struct xfs_buf *bp) +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); + + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)&leaf->hdr - (char *)leaf), + args->dp->d_ops->leaf_hdr_size - 1); +} + +/* + * Log the tail of the leaf1 block. + */ +STATIC void +xfs_dir3_leaf_log_tail( + struct xfs_da_args *args, + struct xfs_buf *bp) +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); + + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), + (uint)(args->geo->blksize - 1)); +} + +/* + * Look up the entry referred to by args in the leaf format directory. + * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which + * is also used by the node-format code. + */ +int +xfs_dir2_leaf_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* found entry index */ + struct xfs_buf *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_leaf_entry *ents; + + trace_xfs_dir2_leaf_lookup(args); + + /* + * Look up name in the leaf block, returning both buffers and index. + */ + if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + return error; + } + tp = args->trans; + dp = args->dp; + xfs_dir3_leaf_check(dp, lbp); + leaf = lbp->b_addr; + ents = dp->d_ops->leaf_ents_p(leaf); + /* + * Get to the leaf entry and contained data entry address. + */ + lep = &ents[index]; + + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)dbp->b_addr + + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); + /* + * Return the found inode number & CI name if appropriate + */ + args->inumber = be64_to_cpu(dep->inumber); + args->filetype = dp->d_ops->data_get_ftype(dep); + error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); + return error; +} + +/* + * Look up name/hash in the leaf block. + * Fill in indexp with the found index, and dbpp with the data buffer. + * If not found dbpp will be NULL, and ENOENT comes back. + * lbpp will always be filled in with the leaf buffer unless there's an error. + */ +static int /* error */ +xfs_dir2_leaf_lookup_int( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf **lbpp, /* out: leaf buffer */ + int *indexp, /* out: index in leaf block */ + struct xfs_buf **dbpp) /* out: data buffer */ +{ + xfs_dir2_db_t curdb = -1; /* current data block number */ + struct xfs_buf *dbp = NULL; /* data buffer */ + xfs_dir2_data_entry_t *dep; /* data entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* index in leaf block */ + struct xfs_buf *lbp; /* leaf buffer */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_trans_t *tp; /* transaction pointer */ + xfs_dir2_db_t cidb = -1; /* case match data block no. */ + enum xfs_dacmp cmp; /* name compare result */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); + if (error) + return error; + + *lbpp = lbp; + leaf = lbp->b_addr; + xfs_dir3_leaf_check(dp, lbp); + ents = dp->d_ops->leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + /* + * Look for the first leaf entry with our hash value. + */ + index = xfs_dir2_leaf_search_hash(args, lbp); + /* + * Loop over all the entries with the right hash value + * looking to match the name. + */ + for (lep = &ents[index]; + index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { + /* + * Skip over stale leaf entries. + */ + if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Get the new data block number. + */ + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); + /* + * If it's not the same as the old data block number, + * need to pitch the old one and read the new one. + */ + if (newdb != curdb) { + if (dbp) + xfs_trans_brelse(tp, dbp); + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, newdb), + -1, &dbp); + if (error) { + xfs_trans_brelse(tp, lbp); + return error; + } + curdb = newdb; + } + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); + /* + * Compare name and if it's an exact match, return the index + * and buffer. If it's the first case-insensitive match, store + * the index and buffer and continue looking for an exact match. + */ + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; + *indexp = index; + /* case exact match: return the current buffer. */ + if (cmp == XFS_CMP_EXACT) { + *dbpp = dbp; + return 0; + } + cidb = curdb; + } + } + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + /* + * Here, we can only be doing a lookup (not a rename or remove). + * If a case-insensitive match was found earlier, re-read the + * appropriate data block if required and return it. + */ + if (args->cmpresult == XFS_CMP_CASE) { + ASSERT(cidb != -1); + if (cidb != curdb) { + xfs_trans_brelse(tp, dbp); + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, cidb), + -1, &dbp); + if (error) { + xfs_trans_brelse(tp, lbp); + return error; + } + } + *dbpp = dbp; + return 0; + } + /* + * No match found, return ENOENT. + */ + ASSERT(cidb == -1); + if (dbp) + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); + return ENOENT; +} + +/* + * Remove an entry from a leaf format directory. + */ +int /* error */ +xfs_dir2_leaf_removename( + xfs_da_args_t *args) /* operation arguments */ +{ + __be16 *bestsp; /* leaf block best freespace */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_db_t db; /* data block number */ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data entry structure */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dir2_db_t i; /* temporary data block # */ + int index; /* index into leaf entries */ + struct xfs_buf *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + xfs_dir2_data_off_t oldbest; /* old value of best free */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_data_free *bf; /* bestfree table */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + trace_xfs_dir2_leaf_removename(args); + + /* + * Lookup the leaf entry, get the leaf and data blocks read in. + */ + if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + return error; + } + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = lbp->b_addr; + hdr = dbp->b_addr; + xfs_dir3_data_check(dp, dbp); + bf = dp->d_ops->data_bestfree_p(hdr); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + /* + * Point to the leaf entry, use that to point to the data entry. + */ + lep = &ents[index]; + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); + needscan = needlog = 0; + oldbest = be16_to_cpu(bf[0].length); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); + ASSERT(be16_to_cpu(bestsp[db]) == oldbest); + /* + * Mark the former data entry unused. + */ + xfs_dir2_data_make_free(args, dbp, + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + /* + * We just mark the leaf entry stale by putting a null in it. + */ + leafhdr.stale++; + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, lbp); + + lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + xfs_dir3_leaf_log_ents(args, lbp, index, index); + + /* + * Scan the freespace in the data block again if necessary, + * log the data block header if necessary. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + if (needlog) + xfs_dir2_data_log_header(args, dbp); + /* + * If the longest freespace in the data block has changed, + * put the new value in the bests table and log that. + */ + if (be16_to_cpu(bf[0].length) != oldbest) { + bestsp[db] = bf[0].length; + xfs_dir3_leaf_log_bests(args, lbp, db, db); + } + xfs_dir3_data_check(dp, dbp); + /* + * If the data block is now empty then get rid of the data block. + */ + if (be16_to_cpu(bf[0].length) == + args->geo->blksize - dp->d_ops->data_entry_offset) { + ASSERT(db != args->geo->datablk); + if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { + /* + * Nope, can't get rid of it because it caused + * allocation of a bmap btree block to do so. + * Just go on, returning success, leaving the + * empty block in place. + */ + if (error == ENOSPC && args->total == 0) + error = 0; + xfs_dir3_leaf_check(dp, lbp); + return error; + } + dbp = NULL; + /* + * If this is the last data block then compact the + * bests table by getting rid of entries. + */ + if (db == be32_to_cpu(ltp->bestcount) - 1) { + /* + * Look for the last active entry (i). + */ + for (i = db - 1; i > 0; i--) { + if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) + break; + } + /* + * Copy the table down so inactive entries at the + * end are removed. + */ + memmove(&bestsp[db - i], bestsp, + (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); + be32_add_cpu(<p->bestcount, -(db - i)); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); + } else + bestsp[db] = cpu_to_be16(NULLDATAOFF); + } + /* + * If the data block was not the first one, drop it. + */ + else if (db != args->geo->datablk) + dbp = NULL; + + xfs_dir3_leaf_check(dp, lbp); + /* + * See if we can convert to block form. + */ + return xfs_dir2_leaf_to_block(args, lbp, dbp); +} + +/* + * Replace the inode number in a leaf format directory entry. + */ +int /* error */ +xfs_dir2_leaf_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* index of leaf entry */ + struct xfs_buf *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_leaf_entry *ents; + + trace_xfs_dir2_leaf_replace(args); + + /* + * Look up the entry. + */ + if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + return error; + } + dp = args->dp; + leaf = lbp->b_addr; + ents = dp->d_ops->leaf_ents_p(leaf); + /* + * Point to the leaf entry, get data address from it. + */ + lep = &ents[index]; + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)dbp->b_addr + + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); + ASSERT(args->inumber != be64_to_cpu(dep->inumber)); + /* + * Put the new inode number in, log it. + */ + dep->inumber = cpu_to_be64(args->inumber); + dp->d_ops->data_put_ftype(dep, args->filetype); + tp = args->trans; + xfs_dir2_data_log_entry(args, dbp, dep); + xfs_dir3_leaf_check(dp, lbp); + xfs_trans_brelse(tp, lbp); + return 0; +} + +/* + * Return index in the leaf block (lbp) which is either the first + * one with this hash value, or if there are none, the insert point + * for that hash value. + */ +int /* index value */ +xfs_dir2_leaf_search_hash( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *lbp) /* leaf buffer */ +{ + xfs_dahash_t hash=0; /* hash from this entry */ + xfs_dahash_t hashwant; /* hash value looking for */ + int high; /* high leaf index */ + int low; /* low leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int mid=0; /* current leaf index */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + leaf = lbp->b_addr; + ents = args->dp->d_ops->leaf_ents_p(leaf); + args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + /* + * Note, the table cannot be empty, so we have to go through the loop. + * Binary search the leaf entries looking for our hash value. + */ + for (lep = ents, low = 0, high = leafhdr.count - 1, + hashwant = args->hashval; + low <= high; ) { + mid = (low + high) >> 1; + if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant) + break; + if (hash < hashwant) + low = mid + 1; + else + high = mid - 1; + } + /* + * Found one, back up through all the equal hash values. + */ + if (hash == hashwant) { + while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) { + mid--; + } + } + /* + * Need to point to an entry higher than ours. + */ + else if (hash < hashwant) + mid++; + return mid; +} + +/* + * Trim off a trailing data block. We know it's empty since the leaf + * freespace table says so. + */ +int /* error */ +xfs_dir2_leaf_trim_data( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *lbp, /* leaf buffer */ + xfs_dir2_db_t db) /* data block number */ +{ + __be16 *bestsp; /* leaf bests table */ + struct xfs_buf *dbp; /* data block buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Read the offending data block. We need its buffer. + */ + error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), + -1, &dbp); + if (error) + return error; + + leaf = lbp->b_addr; + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + +#ifdef DEBUG +{ + struct xfs_dir2_data_hdr *hdr = dbp->b_addr; + struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); + ASSERT(be16_to_cpu(bf[0].length) == + args->geo->blksize - dp->d_ops->data_entry_offset); + ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); +} +#endif + + /* + * Get rid of the data block. + */ + if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { + ASSERT(error != ENOSPC); + xfs_trans_brelse(tp, dbp); + return error; + } + /* + * Eliminate the last bests entry from the table. + */ + bestsp = xfs_dir2_leaf_bests_p(ltp); + be32_add_cpu(<p->bestcount, -1); + memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + return 0; +} + +static inline size_t +xfs_dir3_leaf_size( + struct xfs_dir3_icleaf_hdr *hdr, + int counts) +{ + int entries; + int hdrsize; + + entries = hdr->count - hdr->stale; + if (hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR2_LEAFN_MAGIC) + hdrsize = sizeof(struct xfs_dir2_leaf_hdr); + else + hdrsize = sizeof(struct xfs_dir3_leaf_hdr); + + return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t) + + counts * sizeof(xfs_dir2_data_off_t) + + sizeof(xfs_dir2_leaf_tail_t); +} + +/* + * Convert node form directory to leaf form directory. + * The root of the node form dir needs to already be a LEAFN block. + * Just return if we can't do anything. + */ +int /* error */ +xfs_dir2_node_to_leaf( + xfs_da_state_t *state) /* directory operation state */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + struct xfs_buf *fbp; /* buffer for freespace block */ + xfs_fileoff_t fo; /* freespace file offset */ + xfs_dir2_free_t *free; /* freespace structure */ + struct xfs_buf *lbp; /* buffer for leaf block */ + xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int rval; /* successful free trim? */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir3_icfree_hdr freehdr; + + /* + * There's more than a leaf level in the btree, so there must + * be multiple leafn blocks. Give up. + */ + if (state->path.active > 1) + return 0; + args = state->args; + + trace_xfs_dir2_node_to_leaf(args); + + mp = state->mp; + dp = args->dp; + tp = args->trans; + /* + * Get the last offset in the file. + */ + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) { + return error; + } + fo -= args->geo->fsbcount; + /* + * If there are freespace blocks other than the first one, + * take this opportunity to remove trailing empty freespace blocks + * that may have been left behind during no-space-reservation + * operations. + */ + while (fo > args->geo->freeblk) { + if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) { + return error; + } + if (rval) + fo -= args->geo->fsbcount; + else + return 0; + } + /* + * Now find the block just before the freespace block. + */ + if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) { + return error; + } + /* + * If it's not the single leaf block, give up. + */ + if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize) + return 0; + lbp = state->path.blk[0].bp; + leaf = lbp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || + leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); + + /* + * Read the freespace block. + */ + error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); + if (error) + return error; + free = fbp->b_addr; + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + ASSERT(!freehdr.firstdb); + + /* + * Now see if the leafn and free data will fit in a leaf1. + * If not, release the buffer and give up. + */ + if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) { + xfs_trans_brelse(tp, fbp); + return 0; + } + + /* + * If the leaf has any stale entries in it, compress them out. + */ + if (leafhdr.stale) + xfs_dir3_leaf_compact(args, &leafhdr, lbp); + + lbp->b_ops = &xfs_dir3_leaf1_buf_ops; + xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF); + leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC) + ? XFS_DIR2_LEAF1_MAGIC + : XFS_DIR3_LEAF1_MAGIC; + + /* + * Set up the leaf tail from the freespace block. + */ + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ltp->bestcount = cpu_to_be32(freehdr.nvalid); + + /* + * Set up the leaf bests table. + */ + memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), + freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); + + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_check(dp, lbp); + + /* + * Get rid of the freespace block. + */ + error = xfs_dir2_shrink_inode(args, + xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), + fbp); + if (error) { + /* + * This can't fail here because it can only happen when + * punching out the middle of an extent, and this is an + * isolated block. + */ + ASSERT(error != ENOSPC); + return error; + } + fbp = NULL; + /* + * Now see if we can convert the single-leaf directory + * down to a block form directory. + * This routine always kills the dabuf for the leaf, so + * eliminate it from the path. + */ + error = xfs_dir2_leaf_to_block(args, lbp, NULL); + state->path.blk[0].bp = NULL; + return error; +} diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c new file mode 100644 index 000000000000..4cf8b99d09a4 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -0,0 +1,2284 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_cksum.h" + +/* + * Function declarations. + */ +static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, + int index); +static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, + xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2); +static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, + int index, xfs_da_state_blk_t *dblk, + int *rval); +static int xfs_dir2_node_addname_int(xfs_da_args_t *args, + xfs_da_state_blk_t *fblk); + +/* + * Check internal consistency of a leafn block. + */ +#ifdef DEBUG +#define xfs_dir3_leaf_check(dp, bp) \ +do { \ + if (!xfs_dir3_leafn_check((dp), (bp))) \ + ASSERT(0); \ +} while (0); + +static bool +xfs_dir3_leafn_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { + struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + return false; + } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) + return false; + + return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); +} +#else +#define xfs_dir3_leaf_check(dp, bp) +#endif + +static bool +xfs_dir3_free_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_free_hdr *hdr = bp->b_addr; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) + return false; + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + return false; + if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + return false; + } else { + if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) + return false; + } + + /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ + + return true; +} + +static void +xfs_dir3_free_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_free_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_dir3_free_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_free_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); +} + +const struct xfs_buf_ops xfs_dir3_free_buf_ops = { + .verify_read = xfs_dir3_free_read_verify, + .verify_write = xfs_dir3_free_write_verify, +}; + + +static int +__xfs_dir3_free_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + int err; + + err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir3_free_buf_ops); + + /* try read returns without an error or *bpp if it lands in a hole */ + if (!err && tp && *bpp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); + return err; +} + +int +xfs_dir2_free_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf **bpp) +{ + return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp); +} + +static int +xfs_dir2_free_try_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf **bpp) +{ + return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); +} + +static int +xfs_dir3_free_get_buf( + xfs_da_args_t *args, + xfs_dir2_db_t fbno, + struct xfs_buf **bpp) +{ + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *bp; + int error; + struct xfs_dir3_icfree_hdr hdr; + + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), + -1, &bp, XFS_DATA_FORK); + if (error) + return error; + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF); + bp->b_ops = &xfs_dir3_free_buf_ops; + + /* + * Initialize the new block to be empty, and remember + * its first slot as our empty slot. + */ + memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr)); + memset(&hdr, 0, sizeof(hdr)); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; + + hdr.magic = XFS_DIR3_FREE_MAGIC; + + hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); + hdr3->hdr.owner = cpu_to_be64(dp->i_ino); + uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); + } else + hdr.magic = XFS_DIR2_FREE_MAGIC; + dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); + *bpp = bp; + return 0; +} + +/* + * Log entries from a freespace block. + */ +STATIC void +xfs_dir2_free_log_bests( + struct xfs_da_args *args, + struct xfs_buf *bp, + int first, /* first entry to log */ + int last) /* last entry to log */ +{ + xfs_dir2_free_t *free; /* freespace structure */ + __be16 *bests; + + free = bp->b_addr; + bests = args->dp->d_ops->free_bests_p(free); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || + free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)&bests[first] - (char *)free), + (uint)((char *)&bests[last] - (char *)free + + sizeof(bests[0]) - 1)); +} + +/* + * Log header from a freespace block. + */ +static void +xfs_dir2_free_log_header( + struct xfs_da_args *args, + struct xfs_buf *bp) +{ +#ifdef DEBUG + xfs_dir2_free_t *free; /* freespace structure */ + + free = bp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || + free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); +#endif + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->free_hdr_size - 1); +} + +/* + * Convert a leaf-format directory to a node-format directory. + * We need to change the magic number of the leaf block, and copy + * the freespace table out of the leaf block into its own block. + */ +int /* error */ +xfs_dir2_leaf_to_node( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *lbp) /* leaf buffer */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + struct xfs_buf *fbp; /* freespace buffer */ + xfs_dir2_db_t fdb; /* freespace block number */ + xfs_dir2_free_t *free; /* freespace structure */ + __be16 *from; /* pointer to freespace entry */ + int i; /* leaf freespace index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int n; /* count of live freespc ents */ + xfs_dir2_data_off_t off; /* freespace entry value */ + __be16 *to; /* pointer to freespace entry */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir3_icfree_hdr freehdr; + + trace_xfs_dir2_leaf_to_node(args); + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Add a freespace block to the directory. + */ + if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) { + return error; + } + ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); + /* + * Get the buffer for the new freespace block. + */ + error = xfs_dir3_free_get_buf(args, fdb, &fbp); + if (error) + return error; + + free = fbp->b_addr; + dp->d_ops->free_hdr_from_disk(&freehdr, free); + leaf = lbp->b_addr; + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ASSERT(be32_to_cpu(ltp->bestcount) <= + (uint)dp->i_d.di_size / args->geo->blksize); + + /* + * Copy freespace entries from the leaf block to the new block. + * Count active entries. + */ + from = xfs_dir2_leaf_bests_p(ltp); + to = dp->d_ops->free_bests_p(free); + for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { + if ((off = be16_to_cpu(*from)) != NULLDATAOFF) + n++; + *to = cpu_to_be16(off); + } + + /* + * Now initialize the freespace block header. + */ + freehdr.nused = n; + freehdr.nvalid = be32_to_cpu(ltp->bestcount); + + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_log_header(args, fbp); + + /* + * Converting the leaf to a leafnode is just a matter of changing the + * magic number and the ops. Do the change directly to the buffer as + * it's less work (and less code) than decoding the header to host + * format and back again. + */ + if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)) + leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); + else + leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); + lbp->b_ops = &xfs_dir3_leafn_buf_ops; + xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_check(dp, lbp); + return 0; +} + +/* + * Add a leaf entry to a leaf block in a node-form directory. + * The other work necessary is done from the caller. + */ +static int /* error */ +xfs_dir2_leafn_add( + struct xfs_buf *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int index) /* insertion pt for new entry */ +{ + int compact; /* compacting stale leaves */ + xfs_inode_t *dp; /* incore directory inode */ + int highstale; /* next stale entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int lfloghigh; /* high leaf entry logging */ + int lfloglow; /* low leaf entry logging */ + int lowstale; /* previous stale entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + + trace_xfs_dir2_leafn_add(args, index); + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + leaf = bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + + /* + * Quick check just to make sure we are not going to index + * into other peoples memory + */ + if (index < 0) + return EFSCORRUPTED; + + /* + * If there are already the maximum number of leaf entries in + * the block, if there are no stale entries it won't fit. + * Caller will do a split. If there are stale entries we'll do + * a compact. + */ + + if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { + if (!leafhdr.stale) + return ENOSPC; + compact = leafhdr.stale > 1; + } else + compact = 0; + ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval); + ASSERT(index == leafhdr.count || + be32_to_cpu(ents[index].hashval) >= args->hashval); + + if (args->op_flags & XFS_DA_OP_JUSTCHECK) + return 0; + + /* + * Compact out all but one stale leaf entry. Leaves behind + * the entry closest to index. + */ + if (compact) + xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, + &highstale, &lfloglow, &lfloghigh); + else if (leafhdr.stale) { + /* + * Set impossible logging indices for this case. + */ + lfloglow = leafhdr.count; + lfloghigh = -1; + } + + /* + * Insert the new entry, log everything. + */ + lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + + lep->hashval = cpu_to_be32(args->hashval); + lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, + args->blkno, args->index)); + + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, bp); + xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(dp, bp); + return 0; +} + +#ifdef DEBUG +static void +xfs_dir2_free_hdr_check( + struct xfs_inode *dp, + struct xfs_buf *bp, + xfs_dir2_db_t db) +{ + struct xfs_dir3_icfree_hdr hdr; + + dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); + + ASSERT((hdr.firstdb % + dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); + ASSERT(hdr.firstdb <= db); + ASSERT(db < hdr.firstdb + hdr.nvalid); +} +#else +#define xfs_dir2_free_hdr_check(dp, bp, db) +#endif /* DEBUG */ + +/* + * Return the last hash value in the leaf. + * Stale entries are ok. + */ +xfs_dahash_t /* hash value */ +xfs_dir2_leafn_lasthash( + struct xfs_inode *dp, + struct xfs_buf *bp, /* leaf buffer */ + int *count) /* count of entries in leaf */ +{ + struct xfs_dir2_leaf *leaf = bp->b_addr; + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + + ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || + leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); + + if (count) + *count = leafhdr.count; + if (!leafhdr.count) + return 0; + + ents = dp->d_ops->leaf_ents_p(leaf); + return be32_to_cpu(ents[leafhdr.count - 1].hashval); +} + +/* + * Look up a leaf entry for space to add a name in a node-format leaf block. + * The extrablk in state is a freespace block. + */ +STATIC int +xfs_dir2_leafn_lookup_for_addname( + struct xfs_buf *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ + xfs_dir2_db_t curdb = -1; /* current data block number */ + xfs_dir2_db_t curfdb = -1; /* current free block number */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int fi; /* free entry index */ + xfs_dir2_free_t *free = NULL; /* free block structure */ + int index; /* leaf entry index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int length; /* length of new data entry */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_dir2_db_t newfdb; /* new free block number */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + + xfs_dir3_leaf_check(dp, bp); + ASSERT(leafhdr.count > 0); + + /* + * Look up the hash value in the leaf entries. + */ + index = xfs_dir2_leaf_search_hash(args, bp); + /* + * Do we have a buffer coming in? + */ + if (state->extravalid) { + /* If so, it's a free block buffer, get the block number. */ + curbp = state->extrablk.bp; + curfdb = state->extrablk.blkno; + free = curbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || + free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); + } + length = dp->d_ops->data_entsize(args->namelen); + /* + * Loop over leaf entries with the right hash value. + */ + for (lep = &ents[index]; + index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { + /* + * Skip stale leaf entries. + */ + if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Pull the data block number from the entry. + */ + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); + /* + * For addname, we're looking for a place to put the new entry. + * We want to use a data block with an entry of equal + * hash value to ours if there is one with room. + * + * If this block isn't the data block we already have + * in hand, take a look at it. + */ + if (newdb != curdb) { + __be16 *bests; + + curdb = newdb; + /* + * Convert the data block to the free block + * holding its freespace information. + */ + newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); + /* + * If it's not the one we have in hand, read it in. + */ + if (newfdb != curfdb) { + /* + * If we had one before, drop it. + */ + if (curbp) + xfs_trans_brelse(tp, curbp); + + error = xfs_dir2_free_read(tp, dp, + xfs_dir2_db_to_da(args->geo, + newfdb), + &curbp); + if (error) + return error; + free = curbp->b_addr; + + xfs_dir2_free_hdr_check(dp, curbp, curdb); + } + /* + * Get the index for our entry. + */ + fi = dp->d_ops->db_to_fdindex(args->geo, curdb); + /* + * If it has room, return it. + */ + bests = dp->d_ops->free_bests_p(free); + if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { + XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", + XFS_ERRLEVEL_LOW, mp); + if (curfdb != newfdb) + xfs_trans_brelse(tp, curbp); + return EFSCORRUPTED; + } + curfdb = newfdb; + if (be16_to_cpu(bests[fi]) >= length) + goto out; + } + } + /* Didn't find any space */ + fi = -1; +out: + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + if (curbp) { + /* Giving back a free block. */ + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.index = fi; + state->extrablk.blkno = curfdb; + + /* + * Important: this magic number is not in the buffer - it's for + * buffer type information and therefore only the free/data type + * matters here, not whether CRCs are enabled or not. + */ + state->extrablk.magic = XFS_DIR2_FREE_MAGIC; + } else { + state->extravalid = 0; + } + /* + * Return the index, that will be the insertion point. + */ + *indexp = index; + return ENOENT; +} + +/* + * Look up a leaf entry in a node-format leaf block. + * The extrablk in state a data block. + */ +STATIC int +xfs_dir2_leafn_lookup_for_entry( + struct xfs_buf *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ + xfs_dir2_db_t curdb = -1; /* current data block number */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int index; /* leaf entry index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_trans_t *tp; /* transaction pointer */ + enum xfs_dacmp cmp; /* comparison result */ + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + + xfs_dir3_leaf_check(dp, bp); + ASSERT(leafhdr.count > 0); + + /* + * Look up the hash value in the leaf entries. + */ + index = xfs_dir2_leaf_search_hash(args, bp); + /* + * Do we have a buffer coming in? + */ + if (state->extravalid) { + curbp = state->extrablk.bp; + curdb = state->extrablk.blkno; + } + /* + * Loop over leaf entries with the right hash value. + */ + for (lep = &ents[index]; + index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { + /* + * Skip stale leaf entries. + */ + if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Pull the data block number from the entry. + */ + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); + /* + * Not adding a new entry, so we really want to find + * the name given to us. + * + * If it's a different data block, go get it. + */ + if (newdb != curdb) { + /* + * If we had a block before that we aren't saving + * for a CI name, drop it + */ + if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || + curdb != state->extrablk.blkno)) + xfs_trans_brelse(tp, curbp); + /* + * If needing the block that is saved with a CI match, + * use it otherwise read in the new data block. + */ + if (args->cmpresult != XFS_CMP_DIFFERENT && + newdb == state->extrablk.blkno) { + ASSERT(state->extravalid); + curbp = state->extrablk.bp; + } else { + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, + newdb), + -1, &curbp); + if (error) + return error; + } + xfs_dir3_data_check(dp, curbp); + curdb = newdb; + } + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); + /* + * Compare the entry and if it's an exact match, return + * EEXIST immediately. If it's the first case-insensitive + * match, store the block & inode number and continue looking. + */ + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + /* If there is a CI match block, drop it */ + if (args->cmpresult != XFS_CMP_DIFFERENT && + curdb != state->extrablk.blkno) + xfs_trans_brelse(tp, state->extrablk.bp); + args->cmpresult = cmp; + args->inumber = be64_to_cpu(dep->inumber); + args->filetype = dp->d_ops->data_get_ftype(dep); + *indexp = index; + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.blkno = curdb; + state->extrablk.index = (int)((char *)dep - + (char *)curbp->b_addr); + state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + curbp->b_ops = &xfs_dir3_data_buf_ops; + xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); + if (cmp == XFS_CMP_EXACT) + return EEXIST; + } + } + ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); + if (curbp) { + if (args->cmpresult == XFS_CMP_DIFFERENT) { + /* Giving back last used data block. */ + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.index = -1; + state->extrablk.blkno = curdb; + state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + curbp->b_ops = &xfs_dir3_data_buf_ops; + xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); + } else { + /* If the curbp is not the CI match block, drop it */ + if (state->extrablk.bp != curbp) + xfs_trans_brelse(tp, curbp); + } + } else { + state->extravalid = 0; + } + *indexp = index; + return ENOENT; +} + +/* + * Look up a leaf entry in a node-format leaf block. + * If this is an addname then the extrablk in state is a freespace block, + * otherwise it's a data block. + */ +int +xfs_dir2_leafn_lookup_int( + struct xfs_buf *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + if (args->op_flags & XFS_DA_OP_ADDNAME) + return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp, + state); + return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state); +} + +/* + * Move count leaf entries from source to destination leaf. + * Log entries and headers. Stale entries are preserved. + */ +static void +xfs_dir3_leafn_moveents( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *bp_s, /* source */ + struct xfs_dir3_icleaf_hdr *shdr, + struct xfs_dir2_leaf_entry *sents, + int start_s,/* source leaf index */ + struct xfs_buf *bp_d, /* destination */ + struct xfs_dir3_icleaf_hdr *dhdr, + struct xfs_dir2_leaf_entry *dents, + int start_d,/* destination leaf index */ + int count) /* count of leaves to copy */ +{ + int stale; /* count stale leaves copied */ + + trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); + + /* + * Silently return if nothing to do. + */ + if (count == 0) + return; + + /* + * If the destination index is not the end of the current + * destination leaf entries, open up a hole in the destination + * to hold the new entries. + */ + if (start_d < dhdr->count) { + memmove(&dents[start_d + count], &dents[start_d], + (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, + count + dhdr->count - 1); + } + /* + * If the source has stale leaves, count the ones in the copy range + * so we can update the header correctly. + */ + if (shdr->stale) { + int i; /* temp leaf index */ + + for (i = start_s, stale = 0; i < start_s + count; i++) { + if (sents[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + stale++; + } + } else + stale = 0; + /* + * Copy the leaf entries from source to destination. + */ + memcpy(&dents[start_d], &sents[start_s], + count * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); + + /* + * If there are source entries after the ones we copied, + * delete the ones we copied by sliding the next ones down. + */ + if (start_s + count < shdr->count) { + memmove(&sents[start_s], &sents[start_s + count], + count * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); + } + + /* + * Update the headers and log them. + */ + shdr->count -= count; + shdr->stale -= stale; + dhdr->count += count; + dhdr->stale += stale; +} + +/* + * Determine the sort order of two leaf blocks. + * Returns 1 if both are valid and leaf2 should be before leaf1, else 0. + */ +int /* sort order */ +xfs_dir2_leafn_order( + struct xfs_inode *dp, + struct xfs_buf *leaf1_bp, /* leaf1 buffer */ + struct xfs_buf *leaf2_bp) /* leaf2 buffer */ +{ + struct xfs_dir2_leaf *leaf1 = leaf1_bp->b_addr; + struct xfs_dir2_leaf *leaf2 = leaf2_bp->b_addr; + struct xfs_dir2_leaf_entry *ents1; + struct xfs_dir2_leaf_entry *ents2; + struct xfs_dir3_icleaf_hdr hdr1; + struct xfs_dir3_icleaf_hdr hdr2; + + dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = dp->d_ops->leaf_ents_p(leaf1); + ents2 = dp->d_ops->leaf_ents_p(leaf2); + + if (hdr1.count > 0 && hdr2.count > 0 && + (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || + be32_to_cpu(ents2[hdr2.count - 1].hashval) < + be32_to_cpu(ents1[hdr1.count - 1].hashval))) + return 1; + return 0; +} + +/* + * Rebalance leaf entries between two leaf blocks. + * This is actually only called when the second block is new, + * though the code deals with the general case. + * A new entry will be inserted in one of the blocks, and that + * entry is taken into account when balancing. + */ +static void +xfs_dir2_leafn_rebalance( + xfs_da_state_t *state, /* btree cursor */ + xfs_da_state_blk_t *blk1, /* first btree block */ + xfs_da_state_blk_t *blk2) /* second btree block */ +{ + xfs_da_args_t *args; /* operation arguments */ + int count; /* count (& direction) leaves */ + int isleft; /* new goes in left leaf */ + xfs_dir2_leaf_t *leaf1; /* first leaf structure */ + xfs_dir2_leaf_t *leaf2; /* second leaf structure */ + int mid; /* midpoint leaf index */ +#if defined(DEBUG) || defined(XFS_WARN) + int oldstale; /* old count of stale leaves */ +#endif + int oldsum; /* old total leaf count */ + int swap; /* swapped leaf blocks */ + struct xfs_dir2_leaf_entry *ents1; + struct xfs_dir2_leaf_entry *ents2; + struct xfs_dir3_icleaf_hdr hdr1; + struct xfs_dir3_icleaf_hdr hdr2; + struct xfs_inode *dp = state->args->dp; + + args = state->args; + /* + * If the block order is wrong, swap the arguments. + */ + if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { + xfs_da_state_blk_t *tmp; /* temp for block swap */ + + tmp = blk1; + blk1 = blk2; + blk2 = tmp; + } + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = dp->d_ops->leaf_ents_p(leaf1); + ents2 = dp->d_ops->leaf_ents_p(leaf2); + + oldsum = hdr1.count + hdr2.count; +#if defined(DEBUG) || defined(XFS_WARN) + oldstale = hdr1.stale + hdr2.stale; +#endif + mid = oldsum >> 1; + + /* + * If the old leaf count was odd then the new one will be even, + * so we need to divide the new count evenly. + */ + if (oldsum & 1) { + xfs_dahash_t midhash; /* middle entry hash value */ + + if (mid >= hdr1.count) + midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval); + else + midhash = be32_to_cpu(ents1[mid].hashval); + isleft = args->hashval <= midhash; + } + /* + * If the old count is even then the new count is odd, so there's + * no preferred side for the new entry. + * Pick the left one. + */ + else + isleft = 1; + /* + * Calculate moved entry count. Positive means left-to-right, + * negative means right-to-left. Then move the entries. + */ + count = hdr1.count - mid + (isleft == 0); + if (count > 0) + xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1, + hdr1.count - count, blk2->bp, + &hdr2, ents2, 0, count); + else if (count < 0) + xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0, + blk1->bp, &hdr1, ents1, + hdr1.count, count); + + ASSERT(hdr1.count + hdr2.count == oldsum); + ASSERT(hdr1.stale + hdr2.stale == oldstale); + + /* log the changes made when moving the entries */ + dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); + dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); + xfs_dir3_leaf_log_header(args, blk1->bp); + xfs_dir3_leaf_log_header(args, blk2->bp); + + xfs_dir3_leaf_check(dp, blk1->bp); + xfs_dir3_leaf_check(dp, blk2->bp); + + /* + * Mark whether we're inserting into the old or new leaf. + */ + if (hdr1.count < hdr2.count) + state->inleaf = swap; + else if (hdr1.count > hdr2.count) + state->inleaf = !swap; + else + state->inleaf = swap ^ (blk1->index <= hdr1.count); + /* + * Adjust the expected index for insertion. + */ + if (!state->inleaf) + blk2->index = blk1->index - hdr1.count; + + /* + * Finally sanity check just to make sure we are not returning a + * negative index + */ + if (blk2->index < 0) { + state->inleaf = 1; + blk2->index = 0; + xfs_alert(dp->i_mount, + "%s: picked the wrong leaf? reverting original leaf: blk1->index %d", + __func__, blk1->index); + } +} + +static int +xfs_dir3_data_block_free( + xfs_da_args_t *args, + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_free *free, + xfs_dir2_db_t fdb, + int findex, + struct xfs_buf *fbp, + int longest) +{ + int logfree = 0; + __be16 *bests; + struct xfs_dir3_icfree_hdr freehdr; + struct xfs_inode *dp = args->dp; + + dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); + if (hdr) { + /* + * Data block is not empty, just set the free entry to the new + * value. + */ + bests[findex] = cpu_to_be16(longest); + xfs_dir2_free_log_bests(args, fbp, findex, findex); + return 0; + } + + /* One less used entry in the free table. */ + freehdr.nused--; + + /* + * If this was the last entry in the table, we can trim the table size + * back. There might be other entries at the end referring to + * non-existent data blocks, get those too. + */ + if (findex == freehdr.nvalid - 1) { + int i; /* free entry index */ + + for (i = findex - 1; i >= 0; i--) { + if (bests[i] != cpu_to_be16(NULLDATAOFF)) + break; + } + freehdr.nvalid = i + 1; + logfree = 0; + } else { + /* Not the last entry, just punch it out. */ + bests[findex] = cpu_to_be16(NULLDATAOFF); + logfree = 1; + } + + dp->d_ops->free_hdr_to_disk(free, &freehdr); + xfs_dir2_free_log_header(args, fbp); + + /* + * If there are no useful entries left in the block, get rid of the + * block if we can. + */ + if (!freehdr.nused) { + int error; + + error = xfs_dir2_shrink_inode(args, fdb, fbp); + if (error == 0) { + fbp = NULL; + logfree = 0; + } else if (error != ENOSPC || args->total != 0) + return error; + /* + * It's possible to get ENOSPC if there is no + * space reservation. In this case some one + * else will eventually get rid of this block. + */ + } + + /* Log the free entry that changed, unless we got rid of it. */ + if (logfree) + xfs_dir2_free_log_bests(args, fbp, findex, findex); + return 0; +} + +/* + * Remove an entry from a node directory. + * This removes the leaf entry and the data entry, + * and updates the free block if necessary. + */ +static int /* error */ +xfs_dir2_leafn_remove( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *bp, /* leaf buffer */ + int index, /* leaf entry index */ + xfs_da_state_blk_t *dblk, /* data block */ + int *rval) /* resulting block needs join */ +{ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_db_t db; /* data block number */ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int longest; /* longest data free entry */ + int off; /* data block entry offset */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir2_data_free *bf; /* bestfree table */ + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + + trace_xfs_dir2_leafn_remove(args, index); + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + + /* + * Point to the entry we're removing. + */ + lep = &ents[index]; + + /* + * Extract the data block and offset from the entry. + */ + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + ASSERT(dblk->blkno == db); + off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); + ASSERT(dblk->index == off); + + /* + * Kill the leaf entry by marking it stale. + * Log the leaf block changes. + */ + leafhdr.stale++; + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(args, bp); + + lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); + xfs_dir3_leaf_log_ents(args, bp, index, index); + + /* + * Make the data entry free. Keep track of the longest freespace + * in the data block in case it changes. + */ + dbp = dblk->bp; + hdr = dbp->b_addr; + dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); + bf = dp->d_ops->data_bestfree_p(hdr); + longest = be16_to_cpu(bf[0].length); + needlog = needscan = 0; + xfs_dir2_data_make_free(args, dbp, off, + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + /* + * Rescan the data block freespaces for bestfree. + * Log the data block header if needed. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + if (needlog) + xfs_dir2_data_log_header(args, dbp); + xfs_dir3_data_check(dp, dbp); + /* + * If the longest data block freespace changes, need to update + * the corresponding freeblock entry. + */ + if (longest < be16_to_cpu(bf[0].length)) { + int error; /* error return value */ + struct xfs_buf *fbp; /* freeblock buffer */ + xfs_dir2_db_t fdb; /* freeblock block number */ + int findex; /* index in freeblock entries */ + xfs_dir2_free_t *free; /* freeblock structure */ + + /* + * Convert the data block number to a free block, + * read in the free block. + */ + fdb = dp->d_ops->db_to_fdb(args->geo, db); + error = xfs_dir2_free_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fdb), + &fbp); + if (error) + return error; + free = fbp->b_addr; +#ifdef DEBUG + { + struct xfs_dir3_icfree_hdr freehdr; + dp->d_ops->free_hdr_from_disk(&freehdr, free); + ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * + (fdb - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET))); + } +#endif + /* + * Calculate which entry we need to fix. + */ + findex = dp->d_ops->db_to_fdindex(args->geo, db); + longest = be16_to_cpu(bf[0].length); + /* + * If the data block is now empty we can get rid of it + * (usually). + */ + if (longest == args->geo->blksize - + dp->d_ops->data_entry_offset) { + /* + * Try to punch out the data block. + */ + error = xfs_dir2_shrink_inode(args, db, dbp); + if (error == 0) { + dblk->bp = NULL; + hdr = NULL; + } + /* + * We can get ENOSPC if there's no space reservation. + * In this case just drop the buffer and some one else + * will eventually get rid of the empty block. + */ + else if (!(error == ENOSPC && args->total == 0)) + return error; + } + /* + * If we got rid of the data block, we can eliminate that entry + * in the free block. + */ + error = xfs_dir3_data_block_free(args, hdr, free, + fdb, findex, fbp, longest); + if (error) + return error; + } + + xfs_dir3_leaf_check(dp, bp); + /* + * Return indication of whether this leaf block is empty enough + * to justify trying to join it with a neighbor. + */ + *rval = (dp->d_ops->leaf_hdr_size + + (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < + args->geo->magicpct; + return 0; +} + +/* + * Split the leaf entries in the old block into old and new blocks. + */ +int /* error */ +xfs_dir2_leafn_split( + xfs_da_state_t *state, /* btree cursor */ + xfs_da_state_blk_t *oldblk, /* original block */ + xfs_da_state_blk_t *newblk) /* newly created block */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_dablk_t blkno; /* new leaf block number */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_inode *dp; + + /* + * Allocate space for a new leaf node. + */ + args = state->args; + dp = args->dp; + mp = dp->i_mount; + ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); + error = xfs_da_grow_inode(args, &blkno); + if (error) { + return error; + } + /* + * Initialize the new leaf block. + */ + error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno), + &newblk->bp, XFS_DIR2_LEAFN_MAGIC); + if (error) + return error; + + newblk->blkno = blkno; + newblk->magic = XFS_DIR2_LEAFN_MAGIC; + /* + * Rebalance the entries across the two leaves, link the new + * block into the leaves. + */ + xfs_dir2_leafn_rebalance(state, oldblk, newblk); + error = xfs_da3_blk_link(state, oldblk, newblk); + if (error) { + return error; + } + /* + * Insert the new entry in the correct block. + */ + if (state->inleaf) + error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index); + else + error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index); + /* + * Update last hashval in each block since we added the name. + */ + oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); + newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); + xfs_dir3_leaf_check(dp, oldblk->bp); + xfs_dir3_leaf_check(dp, newblk->bp); + return error; +} + +/* + * Check a leaf block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + */ +int /* error */ +xfs_dir2_leafn_toosmall( + xfs_da_state_t *state, /* btree cursor */ + int *action) /* resulting action to take */ +{ + xfs_da_state_blk_t *blk; /* leaf block */ + xfs_dablk_t blkno; /* leaf block number */ + struct xfs_buf *bp; /* leaf buffer */ + int bytes; /* bytes in use */ + int count; /* leaf live entry count */ + int error; /* error return value */ + int forward; /* sibling block direction */ + int i; /* sibling counter */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int rval; /* result from path_shift */ + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + struct xfs_inode *dp = state->args->dp; + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[state->path.active - 1]; + leaf = blk->bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_check(dp, blk->bp); + + count = leafhdr.count - leafhdr.stale; + bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); + if (bytes > (state->args->geo->blksize >> 1)) { + /* + * Blk over 50%, don't try to join. + */ + *action = 0; + return 0; + } + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (arbitrarily) + * to merge with the forward block unless it is NULL. + */ + if (count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = (leafhdr.forw != 0); + memcpy(&state->altpath, &state->path, sizeof(state->path)); + error = xfs_da3_path_shift(state, &state->altpath, forward, 0, + &rval); + if (error) + return error; + *action = rval ? 2 : 0; + return 0; + } + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink a directory over time. + */ + forward = leafhdr.forw < leafhdr.back; + for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { + struct xfs_dir3_icleaf_hdr hdr2; + + blkno = forward ? leafhdr.forw : leafhdr.back; + if (blkno == 0) + continue; + /* + * Read the sibling leaf block. + */ + error = xfs_dir3_leafn_read(state->args->trans, dp, + blkno, -1, &bp); + if (error) + return error; + + /* + * Count bytes in the two blocks combined. + */ + count = leafhdr.count - leafhdr.stale; + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2); + + leaf = bp->b_addr; + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + count += hdr2.count - hdr2.stale; + bytes -= count * sizeof(ents[0]); + + /* + * Fits with at least 25% to spare. + */ + if (bytes >= 0) + break; + xfs_trans_brelse(state->args->trans, bp); + } + /* + * Didn't like either block, give up. + */ + if (i >= 2) { + *action = 0; + return 0; + } + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + memcpy(&state->altpath, &state->path, sizeof(state->path)); + if (blkno < blk->blkno) + error = xfs_da3_path_shift(state, &state->altpath, forward, 0, + &rval); + else + error = xfs_da3_path_shift(state, &state->path, forward, 0, + &rval); + if (error) { + return error; + } + *action = rval ? 0 : 1; + return 0; +} + +/* + * Move all the leaf entries from drop_blk to save_blk. + * This is done as part of a join operation. + */ +void +xfs_dir2_leafn_unbalance( + xfs_da_state_t *state, /* cursor */ + xfs_da_state_blk_t *drop_blk, /* dead block */ + xfs_da_state_blk_t *save_blk) /* surviving block */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ + xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ + struct xfs_dir3_icleaf_hdr savehdr; + struct xfs_dir3_icleaf_hdr drophdr; + struct xfs_dir2_leaf_entry *sents; + struct xfs_dir2_leaf_entry *dents; + struct xfs_inode *dp = state->args->dp; + + args = state->args; + ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); + ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; + + dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); + dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); + sents = dp->d_ops->leaf_ents_p(save_leaf); + dents = dp->d_ops->leaf_ents_p(drop_leaf); + + /* + * If there are any stale leaf entries, take this opportunity + * to purge them. + */ + if (drophdr.stale) + xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp); + if (savehdr.stale) + xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp); + + /* + * Move the entries from drop to the appropriate end of save. + */ + drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); + if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp)) + xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, + save_blk->bp, &savehdr, sents, 0, + drophdr.count); + else + xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, + save_blk->bp, &savehdr, sents, + savehdr.count, drophdr.count); + save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); + + /* log the changes made when moving the entries */ + dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); + dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); + xfs_dir3_leaf_log_header(args, save_blk->bp); + xfs_dir3_leaf_log_header(args, drop_blk->bp); + + xfs_dir3_leaf_check(dp, save_blk->bp); + xfs_dir3_leaf_check(dp, drop_blk->bp); +} + +/* + * Top-level node form directory addname routine. + */ +int /* error */ +xfs_dir2_node_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block for insert */ + int error; /* error return value */ + int rval; /* sub-return value */ + xfs_da_state_t *state; /* btree cursor */ + + trace_xfs_dir2_node_addname(args); + + /* + * Allocate and initialize the state (btree cursor). + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + /* + * Look up the name. We're not supposed to find it, but + * this gives us the insertion point. + */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) + rval = error; + if (rval != ENOENT) { + goto done; + } + /* + * Add the data entry to a data block. + * Extravalid is set to a freeblock found by lookup. + */ + rval = xfs_dir2_node_addname_int(args, + state->extravalid ? &state->extrablk : NULL); + if (rval) { + goto done; + } + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + /* + * Add the new leaf entry. + */ + rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); + if (rval == 0) { + /* + * It worked, fix the hash values up the btree. + */ + if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) + xfs_da3_fixhashpath(state, &state->path); + } else { + /* + * It didn't work, we need to split the leaf block. + */ + if (args->total == 0) { + ASSERT(rval == ENOSPC); + goto done; + } + /* + * Split the leaf block and insert the new entry. + */ + rval = xfs_da3_split(state); + } +done: + xfs_da_state_free(state); + return rval; +} + +/* + * Add the data entry for a node-format directory name addition. + * The leaf entry is added in xfs_dir2_leafn_add. + * We may enter with a freespace block that the lookup found. + */ +static int /* error */ +xfs_dir2_node_addname_int( + xfs_da_args_t *args, /* operation arguments */ + xfs_da_state_blk_t *fblk) /* optional freespace block */ +{ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_db_t dbno; /* data block number */ + struct xfs_buf *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ + int error; /* error return value */ + xfs_dir2_db_t fbno; /* freespace block number */ + struct xfs_buf *fbp; /* freespace buffer */ + int findex; /* freespace entry index */ + xfs_dir2_free_t *free=NULL; /* freespace block structure */ + xfs_dir2_db_t ifbno; /* initial freespace block no */ + xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ + int length; /* length of the new entry */ + int logfree; /* need to log free entry */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + __be16 *tagp; /* data entry tag pointer */ + xfs_trans_t *tp; /* transaction pointer */ + __be16 *bests; + struct xfs_dir3_icfree_hdr freehdr; + struct xfs_dir2_data_free *bf; + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + length = dp->d_ops->data_entsize(args->namelen); + /* + * If we came in with a freespace block that means that lookup + * found an entry with our hash value. This is the freespace + * block for that data entry. + */ + if (fblk) { + fbp = fblk->bp; + /* + * Remember initial freespace block number. + */ + ifbno = fblk->blkno; + free = fbp->b_addr; + findex = fblk->index; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + /* + * This means the free entry showed that the data block had + * space for our entry, so we remembered it. + * Use that data block. + */ + if (findex >= 0) { + ASSERT(findex < freehdr.nvalid); + ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); + ASSERT(be16_to_cpu(bests[findex]) >= length); + dbno = freehdr.firstdb + findex; + } else { + /* + * The data block looked at didn't have enough room. + * We'll start at the beginning of the freespace entries. + */ + dbno = -1; + findex = 0; + } + } else { + /* + * Didn't come in with a freespace block, so no data block. + */ + ifbno = dbno = -1; + fbp = NULL; + findex = 0; + } + + /* + * If we don't have a data block yet, we're going to scan the + * freespace blocks looking for one. Figure out what the + * highest freespace block number is. + */ + if (dbno == -1) { + xfs_fileoff_t fo; /* freespace block number */ + + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) + return error; + lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); + fbno = ifbno; + } + /* + * While we haven't identified a data block, search the freeblock + * data for a good data block. If we find a null freeblock entry, + * indicating a hole in the data blocks, remember that. + */ + while (dbno == -1) { + /* + * If we don't have a freeblock in hand, get the next one. + */ + if (fbp == NULL) { + /* + * Happens the first time through unless lookup gave + * us a freespace block to start with. + */ + if (++fbno == 0) + fbno = xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET); + /* + * If it's ifbno we already looked at it. + */ + if (fbno == ifbno) + fbno++; + /* + * If it's off the end we're done. + */ + if (fbno >= lastfbno) + break; + /* + * Read the block. There can be holes in the + * freespace blocks, so this might not succeed. + * This should be really rare, so there's no reason + * to avoid it. + */ + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); + if (error) + return error; + if (!fbp) + continue; + free = fbp->b_addr; + findex = 0; + } + /* + * Look at the current free entry. Is it good enough? + * + * The bests initialisation should be where the bufer is read in + * the above branch. But gcc is too stupid to realise that bests + * and the freehdr are actually initialised if they are placed + * there, so we have to do it here to avoid warnings. Blech. + */ + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + if (be16_to_cpu(bests[findex]) != NULLDATAOFF && + be16_to_cpu(bests[findex]) >= length) + dbno = freehdr.firstdb + findex; + else { + /* + * Are we done with the freeblock? + */ + if (++findex == freehdr.nvalid) { + /* + * Drop the block. + */ + xfs_trans_brelse(tp, fbp); + fbp = NULL; + if (fblk && fblk->bp) + fblk->bp = NULL; + } + } + } + /* + * If we don't have a data block, we need to allocate one and make + * the freespace entries refer to it. + */ + if (unlikely(dbno == -1)) { + /* + * Not allowed to allocate, return failure. + */ + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) + return ENOSPC; + + /* + * Allocate and initialize the new data block. + */ + if (unlikely((error = xfs_dir2_grow_inode(args, + XFS_DIR2_DATA_SPACE, + &dbno)) || + (error = xfs_dir3_data_init(args, dbno, &dbp)))) + return error; + + /* + * If (somehow) we have a freespace block, get rid of it. + */ + if (fbp) + xfs_trans_brelse(tp, fbp); + if (fblk && fblk->bp) + fblk->bp = NULL; + + /* + * Get the freespace block corresponding to the data block + * that was just allocated. + */ + fbno = dp->d_ops->db_to_fdb(args->geo, dbno); + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); + if (error) + return error; + + /* + * If there wasn't a freespace block, the read will + * return a NULL fbp. Allocate and initialize a new one. + */ + if (!fbp) { + error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, + &fbno); + if (error) + return error; + + if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { + xfs_alert(mp, + "%s: dir ino %llu needed freesp block %lld for\n" + " data block %lld, got %lld ifbno %llu lastfbno %d", + __func__, (unsigned long long)dp->i_ino, + (long long)dp->d_ops->db_to_fdb( + args->geo, dbno), + (long long)dbno, (long long)fbno, + (unsigned long long)ifbno, lastfbno); + if (fblk) { + xfs_alert(mp, + " fblk 0x%p blkno %llu index %d magic 0x%x", + fblk, + (unsigned long long)fblk->blkno, + fblk->index, + fblk->magic); + } else { + xfs_alert(mp, " ... fblk is NULL"); + } + XFS_ERROR_REPORT("xfs_dir2_node_addname_int", + XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + + /* + * Get a buffer for the new block. + */ + error = xfs_dir3_free_get_buf(args, fbno, &fbp); + if (error) + return error; + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + /* + * Remember the first slot as our empty slot. + */ + freehdr.firstdb = + (fbno - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET)) * + dp->d_ops->free_max_bests(args->geo); + } else { + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + } + + /* + * Set the freespace block index from the data block number. + */ + findex = dp->d_ops->db_to_fdindex(args->geo, dbno); + /* + * If it's after the end of the current entries in the + * freespace block, extend that table. + */ + if (findex >= freehdr.nvalid) { + ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); + freehdr.nvalid = findex + 1; + /* + * Tag new entry so nused will go up. + */ + bests[findex] = cpu_to_be16(NULLDATAOFF); + } + /* + * If this entry was for an empty data block + * (this should always be true) then update the header. + */ + if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { + freehdr.nused++; + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_header(args, fbp); + } + /* + * Update the real value in the table. + * We haven't allocated the data entry yet so this will + * change again. + */ + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + bests[findex] = bf[0].length; + logfree = 1; + } + /* + * We had a data block so we don't have to make a new one. + */ + else { + /* + * If just checking, we succeeded. + */ + if (args->op_flags & XFS_DA_OP_JUSTCHECK) + return 0; + + /* + * Read the data block in. + */ + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, dbno), + -1, &dbp); + if (error) + return error; + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + logfree = 0; + } + ASSERT(be16_to_cpu(bf[0].length) >= length); + /* + * Point to the existing unused space. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + needscan = needlog = 0; + /* + * Mark the first part of the unused space, inuse for us. + */ + xfs_dir2_data_use_free(args, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, + &needlog, &needscan); + /* + * Fill in the new entry and log it. + */ + dep = (xfs_dir2_data_entry_t *)dup; + dep->inumber = cpu_to_be64(args->inumber); + dep->namelen = args->namelen; + memcpy(dep->name, args->name, dep->namelen); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_log_entry(args, dbp, dep); + /* + * Rescan the block for bestfree if needed. + */ + if (needscan) + xfs_dir2_data_freescan(dp, hdr, &needlog); + /* + * Log the data block header if needed. + */ + if (needlog) + xfs_dir2_data_log_header(args, dbp); + /* + * If the freespace entry is now wrong, update it. + */ + bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ + if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { + bests[findex] = bf[0].length; + logfree = 1; + } + /* + * Log the freespace entry if needed. + */ + if (logfree) + xfs_dir2_free_log_bests(args, fbp, findex, findex); + /* + * Return the data block and offset in args, then drop the data block. + */ + args->blkno = (xfs_dablk_t)dbno; + args->index = be16_to_cpu(*tagp); + return 0; +} + +/* + * Lookup an entry in a node-format directory. + * All the real work happens in xfs_da3_node_lookup_int. + * The only real output is the inode number of the entry. + */ +int /* error */ +xfs_dir2_node_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + int error; /* error return value */ + int i; /* btree level */ + int rval; /* operation return value */ + xfs_da_state_t *state; /* btree cursor */ + + trace_xfs_dir2_node_lookup(args); + + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + /* + * Fill in the path to the entry in the cursor. + */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) + rval = error; + else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { + /* If a CI match, dup the actual name and return EEXIST */ + xfs_dir2_data_entry_t *dep; + + dep = (xfs_dir2_data_entry_t *) + ((char *)state->extrablk.bp->b_addr + + state->extrablk.index); + rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); + } + /* + * Release the btree blocks and leaf block. + */ + for (i = 0; i < state->path.active; i++) { + xfs_trans_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + /* + * Release the data block if we have it. + */ + if (state->extravalid && state->extrablk.bp) { + xfs_trans_brelse(args->trans, state->extrablk.bp); + state->extrablk.bp = NULL; + } + xfs_da_state_free(state); + return rval; +} + +/* + * Remove an entry from a node-format directory. + */ +int /* error */ +xfs_dir2_node_removename( + struct xfs_da_args *args) /* operation arguments */ +{ + struct xfs_da_state_blk *blk; /* leaf block */ + int error; /* error return value */ + int rval; /* operation return value */ + struct xfs_da_state *state; /* btree cursor */ + + trace_xfs_dir2_node_removename(args); + + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + + /* Look up the entry we're deleting, set up the cursor. */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) + goto out_free; + + /* Didn't find it, upper layer screwed up. */ + if (rval != EEXIST) { + error = rval; + goto out_free; + } + + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + ASSERT(state->extravalid); + /* + * Remove the leaf and data entries. + * Extrablk refers to the data block. + */ + error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, + &state->extrablk, &rval); + if (error) + goto out_free; + /* + * Fix the hash values up the btree. + */ + xfs_da3_fixhashpath(state, &state->path); + /* + * If we need to join leaf blocks, do it. + */ + if (rval && state->path.active > 1) + error = xfs_da3_join(state); + /* + * If no errors so far, try conversion to leaf format. + */ + if (!error) + error = xfs_dir2_node_to_leaf(state); +out_free: + xfs_da_state_free(state); + return error; +} + +/* + * Replace an entry's inode number in a node-format directory. + */ +int /* error */ +xfs_dir2_node_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + xfs_dir2_data_entry_t *dep; /* data entry changed */ + int error; /* error return value */ + int i; /* btree level */ + xfs_ino_t inum; /* new inode number */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ + int rval; /* internal return value */ + xfs_da_state_t *state; /* btree cursor */ + + trace_xfs_dir2_node_replace(args); + + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + inum = args->inumber; + /* + * Lookup the entry to change in the btree. + */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) { + rval = error; + } + /* + * It should be found, since the vnodeops layer has looked it up + * and locked it. But paranoia is good. + */ + if (rval == EEXIST) { + struct xfs_dir2_leaf_entry *ents; + /* + * Find the leaf entry. + */ + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + leaf = blk->bp->b_addr; + ents = args->dp->d_ops->leaf_ents_p(leaf); + lep = &ents[blk->index]; + ASSERT(state->extravalid); + /* + * Point to the data entry. + */ + hdr = state->extrablk.bp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); + dep = (xfs_dir2_data_entry_t *) + ((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); + ASSERT(inum != be64_to_cpu(dep->inumber)); + /* + * Fill in the new inode number and log the entry. + */ + dep->inumber = cpu_to_be64(inum); + args->dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); + rval = 0; + } + /* + * Didn't find it, and we're holding a data block. Drop it. + */ + else if (state->extravalid) { + xfs_trans_brelse(args->trans, state->extrablk.bp); + state->extrablk.bp = NULL; + } + /* + * Release all the buffers in the cursor. + */ + for (i = 0; i < state->path.active; i++) { + xfs_trans_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + xfs_da_state_free(state); + return rval; +} + +/* + * Trim off a trailing empty freespace block. + * Return (in rvalp) 1 if we did it, 0 if not. + */ +int /* error */ +xfs_dir2_node_trim_free( + xfs_da_args_t *args, /* operation arguments */ + xfs_fileoff_t fo, /* free block number */ + int *rvalp) /* out: did something */ +{ + struct xfs_buf *bp; /* freespace buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dir2_free_t *free; /* freespace structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + struct xfs_dir3_icfree_hdr freehdr; + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Read the freespace block. + */ + error = xfs_dir2_free_try_read(tp, dp, fo, &bp); + if (error) + return error; + /* + * There can be holes in freespace. If fo is a hole, there's + * nothing to do. + */ + if (!bp) + return 0; + free = bp->b_addr; + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + /* + * If there are used entries, there's nothing to do. + */ + if (freehdr.nused > 0) { + xfs_trans_brelse(tp, bp); + *rvalp = 0; + return 0; + } + /* + * Blow the block away. + */ + error = xfs_dir2_shrink_inode(args, + xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); + if (error) { + /* + * Can't fail with ENOSPC since that only happens with no + * space reservation, when breaking up an extent into two + * pieces. This is the last block of an extent. + */ + ASSERT(error != ENOSPC); + xfs_trans_brelse(tp, bp); + return error; + } + /* + * Return that we succeeded. + */ + *rvalp = 1; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h new file mode 100644 index 000000000000..27ce0794d196 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_PRIV_H__ +#define __XFS_DIR2_PRIV_H__ + +struct dir_context; + +/* + * Directory offset/block conversion functions. + * + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)(by >> geo->blklog); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << geo->blklog) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); +} + +/* + * Directory tail pointer accessor functions. Based on block geometry. + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + geo->blksize)) - 1; +} + +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + geo->blksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + +/* xfs_dir2.c */ +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); +extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); + +#define S_SHIFT 12 +extern const unsigned char xfs_mode_to_ftype[]; + +extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, + __uint8_t filetype); + + +/* xfs_dir2_block.c */ +extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf **bpp); +extern int xfs_dir2_block_addname(struct xfs_da_args *args); +extern int xfs_dir2_block_lookup(struct xfs_da_args *args); +extern int xfs_dir2_block_removename(struct xfs_da_args *args); +extern int xfs_dir2_block_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, + struct xfs_buf *lbp, struct xfs_buf *dbp); + +/* xfs_dir2_data.c */ +#ifdef DEBUG +#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); +#else +#define xfs_dir3_data_check(dp,bp) +#endif + +extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); +extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno); + +extern struct xfs_dir2_data_free * +xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, + int *loghead); +extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_buf **bpp); + +/* xfs_dir2_leaf.c */ +extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); +extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, + struct xfs_buf *dbp); +extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); +extern void xfs_dir3_leaf_compact(struct xfs_da_args *args, + struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp); +extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_dir2_leaf_entry *ents, int *indexp, + int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); +extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_buf **bpp, __uint16_t magic); +extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, + struct xfs_buf *bp, int first, int last); +extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, + struct xfs_buf *bp); +extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); +extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_buf *lbp); +extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, + struct xfs_buf *lbp, xfs_dir2_db_t db); +extern struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, + struct xfs_dir2_leaf_entry *ents, int index, int compact, + int lowstale, int highstale, int *lfloglow, int *lfloghigh); +extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, + struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); + +/* xfs_dir2_node.c */ +extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, + struct xfs_buf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, + struct xfs_buf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); +extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp); +extern int xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); +extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); +extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +extern int xfs_dir2_node_addname(struct xfs_da_args *args); +extern int xfs_dir2_node_lookup(struct xfs_da_args *args); +extern int xfs_dir2_node_removename(struct xfs_da_args *args); +extern int xfs_dir2_node_replace(struct xfs_da_args *args); +extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); +extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, struct xfs_buf **bpp); + +/* xfs_dir2_sf.c */ +extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_sf_addname(struct xfs_da_args *args); +extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); +extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_sf_removename(struct xfs_da_args *args); +extern int xfs_dir2_sf_replace(struct xfs_da_args *args); + +/* xfs_dir2_readdir.c */ +extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, + size_t bufsize); + +#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c new file mode 100644 index 000000000000..ab3563b87995 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -0,0 +1,1184 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_error.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_trace.h" +#include "xfs_dinode.h" + +/* + * Prototypes for internal functions. + */ +static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args, + xfs_dir2_sf_entry_t *sfep, + xfs_dir2_data_aoff_t offset, + int new_isize); +static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange, + int new_isize); +static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange, + xfs_dir2_sf_entry_t **sfepp, + xfs_dir2_data_aoff_t *offsetp); +#ifdef DEBUG +static void xfs_dir2_sf_check(xfs_da_args_t *args); +#else +#define xfs_dir2_sf_check(args) +#endif /* DEBUG */ +#if XFS_BIG_INUMS +static void xfs_dir2_sf_toino4(xfs_da_args_t *args); +static void xfs_dir2_sf_toino8(xfs_da_args_t *args); +#endif /* XFS_BIG_INUMS */ + +/* + * Given a block directory (dp/block), calculate its size as a shortform (sf) + * directory and a header for the sf directory, if it will fit it the + * space currently present in the inode. If it won't fit, the output + * size is too big (but not accurate). + */ +int /* size for sf form */ +xfs_dir2_block_sfsize( + xfs_inode_t *dp, /* incore inode pointer */ + xfs_dir2_data_hdr_t *hdr, /* block directory data */ + xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ +{ + xfs_dir2_dataptr_t addr; /* data entry address */ + xfs_dir2_leaf_entry_t *blp; /* leaf area of the block */ + xfs_dir2_block_tail_t *btp; /* tail area of the block */ + int count; /* shortform entry count */ + xfs_dir2_data_entry_t *dep; /* data entry in the block */ + int i; /* block entry index */ + int i8count; /* count of big-inode entries */ + int isdot; /* entry is "." */ + int isdotdot; /* entry is ".." */ + xfs_mount_t *mp; /* mount structure pointer */ + int namelen; /* total name bytes */ + xfs_ino_t parent = 0; /* parent inode number */ + int size=0; /* total computed size */ + int has_ftype; + struct xfs_da_geometry *geo; + + mp = dp->i_mount; + geo = mp->m_dir_geo; + + /* + * if there is a filetype field, add the extra byte to the namelen + * for each entry that we see. + */ + has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; + + count = i8count = namelen = 0; + btp = xfs_dir2_block_tail_p(geo, hdr); + blp = xfs_dir2_block_leaf_p(btp); + + /* + * Iterate over the block's data entries by using the leaf pointers. + */ + for (i = 0; i < be32_to_cpu(btp->count); i++) { + if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Calculate the pointer to the entry at hand. + */ + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(geo, addr)); + /* + * Detect . and .., so we can special-case them. + * . is not included in sf directories. + * .. is included by just the parent inode number. + */ + isdot = dep->namelen == 1 && dep->name[0] == '.'; + isdotdot = + dep->namelen == 2 && + dep->name[0] == '.' && dep->name[1] == '.'; +#if XFS_BIG_INUMS + if (!isdot) + i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; +#endif + /* take into account the file type field */ + if (!isdot && !isdotdot) { + count++; + namelen += dep->namelen + has_ftype; + } else if (isdotdot) + parent = be64_to_cpu(dep->inumber); + /* + * Calculate the new size, see if we should give up yet. + */ + size = xfs_dir2_sf_hdr_size(i8count) + /* header */ + count + /* namelen */ + count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ + namelen + /* name */ + (i8count ? /* inumber */ + (uint)sizeof(xfs_dir2_ino8_t) * count : + (uint)sizeof(xfs_dir2_ino4_t) * count); + if (size > XFS_IFORK_DSIZE(dp)) + return size; /* size value is a failure */ + } + /* + * Create the output header, if it worked. + */ + sfhp->count = count; + sfhp->i8count = i8count; + dp->d_ops->sf_put_parent_ino(sfhp, parent); + return size; +} + +/* + * Convert a block format directory to shortform. + * Caller has already checked that it will fit, and built us a header. + */ +int /* error */ +xfs_dir2_block_to_sf( + xfs_da_args_t *args, /* operation arguments */ + struct xfs_buf *bp, + int size, /* shortform directory size */ + xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ +{ + xfs_dir2_data_hdr_t *hdr; /* block header */ + xfs_dir2_block_tail_t *btp; /* block tail pointer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused data pointer */ + char *endptr; /* end of data entries */ + int error; /* error return value */ + int logflags; /* inode logging flags */ + xfs_mount_t *mp; /* filesystem mount point */ + char *ptr; /* current data pointer */ + xfs_dir2_sf_entry_t *sfep; /* shortform entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ + xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ + + trace_xfs_dir2_block_to_sf(args); + + dp = args->dp; + mp = dp->i_mount; + + /* + * allocate a temporary destination buffer the size of the inode + * to format the data into. Once we have formatted the data, we + * can free the block and copy the formatted data into the inode literal + * area. + */ + dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); + hdr = bp->b_addr; + + /* + * Copy the header into the newly allocate local space. + */ + sfp = (xfs_dir2_sf_hdr_t *)dst; + memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); + + /* + * Set up to loop over the block's entries. + */ + btp = xfs_dir2_block_tail_p(args->geo, hdr); + ptr = (char *)dp->d_ops->data_entry_p(hdr); + endptr = (char *)xfs_dir2_block_leaf_p(btp); + sfep = xfs_dir2_sf_firstentry(sfp); + /* + * Loop over the active and unused entries. + * Stop when we reach the leaf/tail portion of the block. + */ + while (ptr < endptr) { + /* + * If it's unused, just skip over it. + */ + dup = (xfs_dir2_data_unused_t *)ptr; + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + ptr += be16_to_cpu(dup->length); + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + /* + * Skip . + */ + if (dep->namelen == 1 && dep->name[0] == '.') + ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino); + /* + * Skip .., but make sure the inode number is right. + */ + else if (dep->namelen == 2 && + dep->name[0] == '.' && dep->name[1] == '.') + ASSERT(be64_to_cpu(dep->inumber) == + dp->d_ops->sf_get_parent_ino(sfp)); + /* + * Normal entry, copy it into shortform. + */ + else { + sfep->namelen = dep->namelen; + xfs_dir2_sf_put_offset(sfep, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)hdr)); + memcpy(sfep->name, dep->name, dep->namelen); + dp->d_ops->sf_put_ino(sfp, sfep, + be64_to_cpu(dep->inumber)); + dp->d_ops->sf_put_ftype(sfep, + dp->d_ops->data_get_ftype(dep)); + + sfep = dp->d_ops->sf_nextentry(sfp, sfep); + } + ptr += dp->d_ops->data_entsize(dep->namelen); + } + ASSERT((char *)sfep - (char *)sfp == size); + + /* now we are done with the block, we can shrink the inode */ + logflags = XFS_ILOG_CORE; + error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); + if (error) { + ASSERT(error != ENOSPC); + goto out; + } + + /* + * The buffer is now unconditionally gone, whether + * xfs_dir2_shrink_inode worked or not. + * + * Convert the inode to local format and copy the data in. + */ + dp->i_df.if_flags &= ~XFS_IFEXTENTS; + dp->i_df.if_flags |= XFS_IFINLINE; + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + ASSERT(dp->i_df.if_bytes == 0); + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + + logflags |= XFS_ILOG_DDATA; + memcpy(dp->i_df.if_u1.if_data, dst, size); + dp->i_d.di_size = size; + xfs_dir2_sf_check(args); +out: + xfs_trans_log_inode(args->trans, dp, logflags); + kmem_free(dst); + return error; +} + +/* + * Add a name to a shortform directory. + * There are two algorithms, "easy" and "hard" which we decide on + * before changing anything. + * Convert to block form if necessary, if the new entry won't fit. + */ +int /* error */ +xfs_dir2_sf_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int incr_isize; /* total change in size */ + int new_isize; /* di_size after adding name */ + int objchange; /* changing to 8-byte inodes */ + xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ + int pick; /* which algorithm to use */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ + + trace_xfs_dir2_sf_addname(args); + + ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Make sure the shortform value has some of its header. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return EIO; + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + /* + * Compute entry (and change in) size. + */ + incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); + objchange = 0; +#if XFS_BIG_INUMS + /* + * Do we have to change to 8 byte inodes? + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + /* + * Yes, adjust the inode size. old count + (parent + new) + */ + incr_isize += + (sfp->count + 2) * + ((uint)sizeof(xfs_dir2_ino8_t) - + (uint)sizeof(xfs_dir2_ino4_t)); + objchange = 1; + } +#endif + new_isize = (int)dp->i_d.di_size + incr_isize; + /* + * Won't fit as shortform any more (due to size), + * or the pick routine says it won't (due to offset values). + */ + if (new_isize > XFS_IFORK_DSIZE(dp) || + (pick = + xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) { + /* + * Just checking or no space reservation, it doesn't fit. + */ + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) + return ENOSPC; + /* + * Convert to block form then add the name. + */ + error = xfs_dir2_sf_to_block(args); + if (error) + return error; + return xfs_dir2_block_addname(args); + } + /* + * Just checking, it fits. + */ + if (args->op_flags & XFS_DA_OP_JUSTCHECK) + return 0; + /* + * Do it the easy way - just add it at the end. + */ + if (pick == 1) + xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize); + /* + * Do it the hard way - look for a place to insert the new entry. + * Convert to 8 byte inode numbers first if necessary. + */ + else { + ASSERT(pick == 2); +#if XFS_BIG_INUMS + if (objchange) + xfs_dir2_sf_toino8(args); +#endif + xfs_dir2_sf_addname_hard(args, objchange, new_isize); + } + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Add the new entry the "easy" way. + * This is copying the old directory and adding the new entry at the end. + * Since it's sorted by "offset" we need room after the last offset + * that's already there, and then room to convert to a block directory. + * This is already checked by the pick routine. + */ +static void +xfs_dir2_sf_addname_easy( + xfs_da_args_t *args, /* operation arguments */ + xfs_dir2_sf_entry_t *sfep, /* pointer to new entry */ + xfs_dir2_data_aoff_t offset, /* offset to use for new ent */ + int new_isize) /* new directory size */ +{ + int byteoff; /* byte offset in sf dir */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + + dp = args->dp; + + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + byteoff = (int)((char *)sfep - (char *)sfp); + /* + * Grow the in-inode space. + */ + xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), + XFS_DATA_FORK); + /* + * Need to set up again due to realloc of the inode data. + */ + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); + /* + * Fill in the new entry. + */ + sfep->namelen = args->namelen; + xfs_dir2_sf_put_offset(sfep, offset); + memcpy(sfep->name, args->name, sfep->namelen); + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); + + /* + * Update the header and inode. + */ + sfp->count++; +#if XFS_BIG_INUMS + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) + sfp->i8count++; +#endif + dp->i_d.di_size = new_isize; + xfs_dir2_sf_check(args); +} + +/* + * Add the new entry the "hard" way. + * The caller has already converted to 8 byte inode numbers if necessary, + * in which case we need to leave the i8count at 1. + * Find a hole that the new entry will fit into, and copy + * the first part of the entries, the new entry, and the last part of + * the entries. + */ +/* ARGSUSED */ +static void +xfs_dir2_sf_addname_hard( + xfs_da_args_t *args, /* operation arguments */ + int objchange, /* changing inode number size */ + int new_isize) /* new directory size */ +{ + int add_datasize; /* data size need for new ent */ + char *buf; /* buffer for old */ + xfs_inode_t *dp; /* incore directory inode */ + int eof; /* reached end of old dir */ + int nbytes; /* temp for byte copies */ + xfs_dir2_data_aoff_t new_offset; /* next offset value */ + xfs_dir2_data_aoff_t offset; /* current offset value */ + int old_isize; /* previous di_size */ + xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ + xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ + xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ + struct xfs_mount *mp; + + /* + * Copy the old directory to the stack buffer. + */ + dp = args->dp; + mp = dp->i_mount; + + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + old_isize = (int)dp->i_d.di_size; + buf = kmem_alloc(old_isize, KM_SLEEP); + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + memcpy(oldsfp, sfp, old_isize); + /* + * Loop over the old directory finding the place we're going + * to insert the new entry. + * If it's going to end up at the end then oldsfep will point there. + */ + for (offset = dp->d_ops->data_first_offset, + oldsfep = xfs_dir2_sf_firstentry(oldsfp), + add_datasize = dp->d_ops->data_entsize(args->namelen), + eof = (char *)oldsfep == &buf[old_isize]; + !eof; + offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), + eof = (char *)oldsfep == &buf[old_isize]) { + new_offset = xfs_dir2_sf_get_offset(oldsfep); + if (offset + add_datasize <= new_offset) + break; + } + /* + * Get rid of the old directory, then allocate space for + * the new one. We do this so xfs_idata_realloc won't copy + * the data. + */ + xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK); + xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); + /* + * Reset the pointer since the buffer was reallocated. + */ + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + /* + * Copy the first part of the directory, including the header. + */ + nbytes = (int)((char *)oldsfep - (char *)oldsfp); + memcpy(sfp, oldsfp, nbytes); + sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes); + /* + * Fill in the new entry, and update the header counts. + */ + sfep->namelen = args->namelen; + xfs_dir2_sf_put_offset(sfep, offset); + memcpy(sfep->name, args->name, sfep->namelen); + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); + sfp->count++; +#if XFS_BIG_INUMS + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) + sfp->i8count++; +#endif + /* + * If there's more left to copy, do that. + */ + if (!eof) { + sfep = dp->d_ops->sf_nextentry(sfp, sfep); + memcpy(sfep, oldsfep, old_isize - nbytes); + } + kmem_free(buf); + dp->i_d.di_size = new_isize; + xfs_dir2_sf_check(args); +} + +/* + * Decide if the new entry will fit at all. + * If it will fit, pick between adding the new entry to the end (easy) + * or somewhere else (hard). + * Return 0 (won't fit), 1 (easy), 2 (hard). + */ +/*ARGSUSED*/ +static int /* pick result */ +xfs_dir2_sf_addname_pick( + xfs_da_args_t *args, /* operation arguments */ + int objchange, /* inode # size changes */ + xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */ + xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int holefit; /* found hole it will fit in */ + int i; /* entry number */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_aoff_t offset; /* data block offset */ + xfs_dir2_sf_entry_t *sfep; /* shortform entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + int size; /* entry's data size */ + int used; /* data bytes used */ + + dp = args->dp; + mp = dp->i_mount; + + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + size = dp->d_ops->data_entsize(args->namelen); + offset = dp->d_ops->data_first_offset; + sfep = xfs_dir2_sf_firstentry(sfp); + holefit = 0; + /* + * Loop over sf entries. + * Keep track of data offset and whether we've seen a place + * to insert the new entry. + */ + for (i = 0; i < sfp->count; i++) { + if (!holefit) + holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); + offset = xfs_dir2_sf_get_offset(sfep) + + dp->d_ops->data_entsize(sfep->namelen); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); + } + /* + * Calculate data bytes used excluding the new entry, if this + * was a data block (block form directory). + */ + used = offset + + (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t); + /* + * If it won't fit in a block form then we can't insert it, + * we'll go back, convert to block, then try the insert and convert + * to leaf. + */ + if (used + (holefit ? 0 : size) > args->geo->blksize) + return 0; + /* + * If changing the inode number size, do it the hard way. + */ +#if XFS_BIG_INUMS + if (objchange) { + return 2; + } +#else + ASSERT(objchange == 0); +#endif + /* + * If it won't fit at the end then do it the hard way (use the hole). + */ + if (used + size > args->geo->blksize) + return 2; + /* + * Do it the easy way. + */ + *sfepp = sfep; + *offsetp = offset; + return 1; +} + +#ifdef DEBUG +/* + * Check consistency of shortform directory, assert if bad. + */ +static void +xfs_dir2_sf_check( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry number */ + int i8count; /* number of big inode#s */ + xfs_ino_t ino; /* entry inode number */ + int offset; /* data offset */ + xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + struct xfs_mount *mp; + + dp = args->dp; + mp = dp->i_mount; + + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + offset = dp->d_ops->data_first_offset; + ino = dp->d_ops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); + i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); + ino = dp->d_ops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + offset = + xfs_dir2_sf_get_offset(sfep) + + dp->d_ops->data_entsize(sfep->namelen); + ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); + } + ASSERT(i8count == sfp->i8count); + ASSERT(XFS_BIG_INUMS || i8count == 0); + ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); + ASSERT(offset + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize); +} +#endif /* DEBUG */ + +/* + * Create a new (shortform) directory. + */ +int /* error, always 0 */ +xfs_dir2_sf_create( + xfs_da_args_t *args, /* operation arguments */ + xfs_ino_t pino) /* parent inode number */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i8count; /* parent inode is an 8-byte number */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + int size; /* directory size */ + + trace_xfs_dir2_sf_create(args); + + dp = args->dp; + + ASSERT(dp != NULL); + ASSERT(dp->i_d.di_size == 0); + /* + * If it's currently a zero-length extent file, + * convert it to local format. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { + dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + dp->i_df.if_flags |= XFS_IFINLINE; + } + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + ASSERT(dp->i_df.if_bytes == 0); + i8count = pino > XFS_DIR2_MAX_SHORT_INUM; + size = xfs_dir2_sf_hdr_size(i8count); + /* + * Make a buffer for the data. + */ + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + /* + * Fill in the header, + */ + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp->i8count = i8count; + /* + * Now can put in the inode number, since i8count is set. + */ + dp->d_ops->sf_put_parent_ino(sfp, pino); + sfp->count = 0; + dp->i_d.di_size = size; + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Lookup an entry in a shortform directory. + * Returns EEXIST if found, ENOENT if not found. + */ +int /* error */ +xfs_dir2_sf_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + int error; + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + enum xfs_dacmp cmp; /* comparison result */ + xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ + + trace_xfs_dir2_sf_lookup(args); + + xfs_dir2_sf_check(args); + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Bail out if the directory is way too short. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return EIO; + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + /* + * Special case for . + */ + if (args->namelen == 1 && args->name[0] == '.') { + args->inumber = dp->i_ino; + args->cmpresult = XFS_CMP_EXACT; + args->filetype = XFS_DIR3_FT_DIR; + return EEXIST; + } + /* + * Special case for .. + */ + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { + args->inumber = dp->d_ops->sf_get_parent_ino(sfp); + args->cmpresult = XFS_CMP_EXACT; + args->filetype = XFS_DIR3_FT_DIR; + return EEXIST; + } + /* + * Loop over all the entries trying to match ours. + */ + ci_sfep = NULL; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + /* + * Compare name and if it's an exact match, return the inode + * number. If it's the first case-insensitive match, store the + * inode number and continue looking for an exact match. + */ + cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name, + sfep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; + args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); + args->filetype = dp->d_ops->sf_get_ftype(sfep); + if (cmp == XFS_CMP_EXACT) + return EEXIST; + ci_sfep = sfep; + } + } + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + /* + * Here, we can only be doing a lookup (not a rename or replace). + * If a case-insensitive match was not found, return ENOENT. + */ + if (!ci_sfep) + return ENOENT; + /* otherwise process the CI match as required by the caller */ + error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); + return error; +} + +/* + * Remove an entry from a shortform directory. + */ +int /* error */ +xfs_dir2_sf_removename( + xfs_da_args_t *args) +{ + int byteoff; /* offset of removed entry */ + xfs_inode_t *dp; /* incore directory inode */ + int entsize; /* this entry's size */ + int i; /* shortform entry index */ + int newsize; /* new inode size */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + + trace_xfs_dir2_sf_removename(args); + + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + oldsize = (int)dp->i_d.di_size; + /* + * Bail out if the directory is way too short. + */ + if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return EIO; + } + ASSERT(dp->i_df.if_bytes == oldsize); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); + /* + * Loop over the old directory entries. + * Find the one we're deleting. + */ + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + if (xfs_da_compname(args, sfep->name, sfep->namelen) == + XFS_CMP_EXACT) { + ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == + args->inumber); + break; + } + } + /* + * Didn't find it. + */ + if (i == sfp->count) + return ENOENT; + /* + * Calculate sizes. + */ + byteoff = (int)((char *)sfep - (char *)sfp); + entsize = dp->d_ops->sf_entsize(sfp, args->namelen); + newsize = oldsize - entsize; + /* + * Copy the part if any after the removed entry, sliding it down. + */ + if (byteoff + entsize < oldsize) + memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize, + oldsize - (byteoff + entsize)); + /* + * Fix up the header and file size. + */ + sfp->count--; + dp->i_d.di_size = newsize; + /* + * Reallocate, making it smaller. + */ + xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; +#if XFS_BIG_INUMS + /* + * Are we changing inode number size? + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { + if (sfp->i8count == 1) + xfs_dir2_sf_toino4(args); + else + sfp->i8count--; + } +#endif + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Replace the inode number of an entry in a shortform directory. + */ +int /* error */ +xfs_dir2_sf_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ +#if XFS_BIG_INUMS || defined(DEBUG) + xfs_ino_t ino=0; /* entry old inode number */ +#endif +#if XFS_BIG_INUMS + int i8elevated; /* sf_toino8 set i8count=1 */ +#endif + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + + trace_xfs_dir2_sf_replace(args); + + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Bail out if the shortform directory is way too small. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return EIO; + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); +#if XFS_BIG_INUMS + /* + * New inode number is large, and need to convert to 8-byte inodes. + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + int error; /* error return value */ + int newsize; /* new inode size */ + + newsize = + dp->i_df.if_bytes + + (sfp->count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - + (uint)sizeof(xfs_dir2_ino4_t)); + /* + * Won't fit as shortform, convert to block then do replace. + */ + if (newsize > XFS_IFORK_DSIZE(dp)) { + error = xfs_dir2_sf_to_block(args); + if (error) { + return error; + } + return xfs_dir2_block_replace(args); + } + /* + * Still fits, convert to 8-byte now. + */ + xfs_dir2_sf_toino8(args); + i8elevated = 1; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + } else + i8elevated = 0; +#endif + ASSERT(args->namelen != 1 || args->name[0] != '.'); + /* + * Replace ..'s entry. + */ + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { +#if XFS_BIG_INUMS || defined(DEBUG) + ino = dp->d_ops->sf_get_parent_ino(sfp); + ASSERT(args->inumber != ino); +#endif + dp->d_ops->sf_put_parent_ino(sfp, args->inumber); + } + /* + * Normal entry, look for the name. + */ + else { + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + if (xfs_da_compname(args, sfep->name, sfep->namelen) == + XFS_CMP_EXACT) { +#if XFS_BIG_INUMS || defined(DEBUG) + ino = dp->d_ops->sf_get_ino(sfp, sfep); + ASSERT(args->inumber != ino); +#endif + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); + break; + } + } + /* + * Didn't find it. + */ + if (i == sfp->count) { + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); +#if XFS_BIG_INUMS + if (i8elevated) + xfs_dir2_sf_toino4(args); +#endif + return ENOENT; + } + } +#if XFS_BIG_INUMS + /* + * See if the old number was large, the new number is small. + */ + if (ino > XFS_DIR2_MAX_SHORT_INUM && + args->inumber <= XFS_DIR2_MAX_SHORT_INUM) { + /* + * And the old count was one, so need to convert to small. + */ + if (sfp->i8count == 1) + xfs_dir2_sf_toino4(args); + else + sfp->i8count--; + } + /* + * See if the old number was small, the new number is large. + */ + if (ino <= XFS_DIR2_MAX_SHORT_INUM && + args->inumber > XFS_DIR2_MAX_SHORT_INUM) { + /* + * add to the i8count unless we just converted to 8-byte + * inodes (which does an implied i8count = 1) + */ + ASSERT(sfp->i8count != 0); + if (!i8elevated) + sfp->i8count++; + } +#endif + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); + return 0; +} + +#if XFS_BIG_INUMS +/* + * Convert from 8-byte inode numbers to 4-byte inode numbers. + * The last 8-byte inode number is gone, but the count is still 1. + */ +static void +xfs_dir2_sf_toino4( + xfs_da_args_t *args) /* operation arguments */ +{ + char *buf; /* old dir's buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + int newsize; /* new inode size */ + xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* new sf entry */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + struct xfs_mount *mp; + + trace_xfs_dir2_sf_toino4(args); + + dp = args->dp; + mp = dp->i_mount; + + /* + * Copy the old directory to the buffer. + * Then nuke it from the inode, and add the new buffer to the inode. + * Don't want xfs_idata_realloc copying the data here. + */ + oldsize = dp->i_df.if_bytes; + buf = kmem_alloc(oldsize, KM_SLEEP); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 1); + memcpy(buf, oldsfp, oldsize); + /* + * Compute the new inode size. + */ + newsize = + oldsize - + (oldsfp->count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); + xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); + xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); + /* + * Reset our pointers, the data has moved. + */ + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + /* + * Fill in the new header. + */ + sfp->count = oldsfp->count; + sfp->i8count = 0; + dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + /* + * Copy the entries field by field. + */ + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), + oldsfep = xfs_dir2_sf_firstentry(oldsfp); + i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + sfep->namelen = oldsfep->namelen; + sfep->offset = oldsfep->offset; + memcpy(sfep->name, oldsfep->name, sfep->namelen); + dp->d_ops->sf_put_ino(sfp, sfep, + dp->d_ops->sf_get_ino(oldsfp, oldsfep)); + dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + } + /* + * Clean up the inode. + */ + kmem_free(buf); + dp->i_d.di_size = newsize; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); +} + +/* + * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. + * The new entry w/ an 8-byte inode number is not there yet; we leave with + * i8count set to 1, but no corresponding 8-byte entry. + */ +static void +xfs_dir2_sf_toino8( + xfs_da_args_t *args) /* operation arguments */ +{ + char *buf; /* old dir's buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + int newsize; /* new inode size */ + xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* new sf entry */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ + struct xfs_mount *mp; + + trace_xfs_dir2_sf_toino8(args); + + dp = args->dp; + mp = dp->i_mount; + + /* + * Copy the old directory to the buffer. + * Then nuke it from the inode, and add the new buffer to the inode. + * Don't want xfs_idata_realloc copying the data here. + */ + oldsize = dp->i_df.if_bytes; + buf = kmem_alloc(oldsize, KM_SLEEP); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 0); + memcpy(buf, oldsfp, oldsize); + /* + * Compute the new inode size (nb: entry count + 1 for parent) + */ + newsize = + oldsize + + (oldsfp->count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); + xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); + xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); + /* + * Reset our pointers, the data has moved. + */ + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + /* + * Fill in the new header. + */ + sfp->count = oldsfp->count; + sfp->i8count = 1; + dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + /* + * Copy the entries field by field. + */ + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), + oldsfep = xfs_dir2_sf_firstentry(oldsfp); + i < sfp->count; + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + sfep->namelen = oldsfep->namelen; + sfep->offset = oldsfep->offset; + memcpy(sfep->name, oldsfep->name, sfep->namelen); + dp->d_ops->sf_put_ino(sfp, sfep, + dp->d_ops->sf_get_ino(oldsfp, oldsfep)); + dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + } + /* + * Clean up the inode. + */ + kmem_free(buf); + dp->i_d.di_size = newsize; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); +} +#endif /* XFS_BIG_INUMS */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c new file mode 100644 index 000000000000..c2ac0c611ad8 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_qm.h" +#include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_trace.h" + +int +xfs_calc_dquots_per_chunk( + unsigned int nbblks) /* basic block units */ +{ + unsigned int ndquots; + + ASSERT(nbblks > 0); + ndquots = BBTOB(nbblks); + do_div(ndquots, sizeof(xfs_dqblk_t)); + + return ndquots; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dqcheck( + struct xfs_mount *mp, + xfs_disk_dquot_t *ddq, + xfs_dqid_t id, + uint type, /* used only when IO_dorepair is true */ + uint flags, + char *str) +{ + xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; + int errs = 0; + + /* + * We can encounter an uninitialized dquot buffer for 2 reasons: + * 1. If we crash while deleting the quotainode(s), and those blks got + * used for user data. This is because we take the path of regular + * file deletion; however, the size field of quotainodes is never + * updated, so all the tricks that we play in itruncate_finish + * don't quite matter. + * + * 2. We don't play the quota buffers when there's a quotaoff logitem. + * But the allocation will be replayed so we'll end up with an + * uninitialized quota block. + * + * This is all fine; things are still consistent, and we haven't lost + * any quota information. Just don't complain about bad dquot blks. + */ + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", + str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); + errs++; + } + if (ddq->d_version != XFS_DQUOT_VERSION) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", + str, id, ddq->d_version, XFS_DQUOT_VERSION); + errs++; + } + + if (ddq->d_flags != XFS_DQ_USER && + ddq->d_flags != XFS_DQ_PROJ && + ddq->d_flags != XFS_DQ_GROUP) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, unknown flags 0x%x", + str, id, ddq->d_flags); + errs++; + } + + if (id != -1 && id != be32_to_cpu(ddq->d_id)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : ondisk-dquot 0x%p, ID mismatch: " + "0x%x expected, found id 0x%x", + str, ddq, id, be32_to_cpu(ddq->d_id)); + errs++; + } + + if (!errs && ddq->d_id) { + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > + be64_to_cpu(ddq->d_blk_softlimit)) { + if (!ddq->d_btimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > + be64_to_cpu(ddq->d_ino_softlimit)) { + if (!ddq->d_itimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > + be64_to_cpu(ddq->d_rtb_softlimit)) { + if (!ddq->d_rtbtimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + } + + if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) + return errs; + + if (flags & XFS_QMOPT_DOWARN) + xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); + + /* + * Typically, a repair is only requested by quotacheck. + */ + ASSERT(id != -1); + ASSERT(flags & XFS_QMOPT_DQREPAIR); + memset(d, 0, sizeof(xfs_dqblk_t)); + + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_flags = type; + d->dd_diskdq.d_id = cpu_to_be32(id); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + + return errs; +} + +STATIC bool +xfs_dquot_buf_verify_crc( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + int ndquots; + int i; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return true; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_calc_dquots_per_chunk( + XFS_BB_TO_FSB(mp, bp->b_length)); + + for (i = 0; i < ndquots; i++, d++) { + if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF)) + return false; + if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) + return false; + } + return true; +} + +STATIC bool +xfs_dquot_buf_verify( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_dqid_t id = 0; + int ndquots; + int i; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_calc_dquots_per_chunk(bp->b_length); + + /* + * On the first read of the buffer, verify that each dquot is valid. + * We don't know what the id of the dquot is supposed to be, just that + * they should be increasing monotonically within the buffer. If the + * first id is corrupt, then it will fail on the second dquot in the + * buffer so corruptions could point to the wrong dquot in this case. + */ + for (i = 0; i < ndquots; i++) { + struct xfs_disk_dquot *ddq; + int error; + + ddq = &d[i].dd_diskdq; + + if (i == 0) + id = be32_to_cpu(ddq->d_id); + + error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, + "xfs_dquot_buf_verify"); + if (error) + return false; + } + return true; +} + +static void +xfs_dquot_buf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify_crc(mp, bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dquot_buf_verify(mp, bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +/* + * we don't calculate the CRC here as that is done when the dquot is flushed to + * the buffer after the update is done. This ensures that the dquot in the + * buffer always has an up-to-date CRC value. + */ +static void +xfs_dquot_buf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify(mp, bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } +} + +const struct xfs_buf_ops xfs_dquot_buf_ops = { + .verify_read = xfs_dquot_buf_read_verify, + .verify_write = xfs_dquot_buf_write_verify, +}; + diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c new file mode 100644 index 000000000000..16fb63a9bc5e --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -0,0 +1,2189 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_bmap.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_icreate_item.h" +#include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_trace.h" + + +/* + * Allocation group level functions. + */ +static inline int +xfs_ialloc_cluster_alignment( + xfs_alloc_arg_t *args) +{ + if (xfs_sb_version_hasalign(&args->mp->m_sb) && + args->mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) + return args->mp->m_sb.sb_inoalignmt; + return 1; +} + +/* + * Lookup a record by ino in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + xfs_lookup_t dir, /* <=, >=, == */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = 0; + cur->bc_rec.i.ir_free = 0; + return xfs_btree_lookup(cur, dir, stat); +} + +/* + * Update the record referred to by cur to the value given. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_inobt_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_inobt_rec_incore_t *irec) /* btree record */ +{ + union xfs_btree_rec rec; + + rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); + rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); + rec.inobt.ir_free = cpu_to_be64(irec->ir_free); + return xfs_btree_update(cur, &rec); +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_inobt_rec_incore_t *irec, /* btree record */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); + irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); + irec->ir_free = be64_to_cpu(rec->inobt.ir_free); + } + return error; +} + +/* + * Insert a single inobt record. Cursor must already point to desired location. + */ +STATIC int +xfs_inobt_insert_rec( + struct xfs_btree_cur *cur, + __int32_t freecount, + xfs_inofree_t free, + int *stat) +{ + cur->bc_rec.i.ir_freecount = freecount; + cur->bc_rec.i.ir_free = free; + return xfs_btree_insert(cur, stat); +} + +/* + * Insert records describing a newly allocated inode chunk into the inobt. + */ +STATIC int +xfs_inobt_insert( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t newino, + xfs_agino_t newlen, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agino_t thisino; + int i; + int error; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + + error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, + XFS_INOBT_ALL_FREE, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + + return 0; +} + +/* + * Verify that the number of free inodes in the AGI is correct. + */ +#ifdef DEBUG +STATIC int +xfs_check_agi_freecount( + struct xfs_btree_cur *cur, + struct xfs_agi *agi) +{ + if (cur->bc_nlevels == 1) { + xfs_inobt_rec_incore_t rec; + int freecount = 0; + int error; + int i; + + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + + do { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + + if (i) { + freecount += rec.ir_freecount; + error = xfs_btree_increment(cur, 0, &i); + if (error) + return error; + } + } while (i == 1); + + if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) + ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); + } + return 0; +} +#else +#define xfs_check_agi_freecount(cur, agi) 0 +#endif + +/* + * Initialise a new set of inodes. When called without a transaction context + * (e.g. from recovery) we initiate a delayed write of the inode buffers rather + * than logging them (which in a transaction context puts them into the AIL + * for writeback rather than the xfsbufd queue). + */ +int +xfs_ialloc_inode_init( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct list_head *buffer_list, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_agblock_t length, + unsigned int gen) +{ + struct xfs_buf *fbuf; + struct xfs_dinode *free; + int nbufs, blks_per_cluster, inodes_per_cluster; + int version; + int i, j; + xfs_daddr_t d; + xfs_ino_t ino = 0; + + /* + * Loop over the new block(s), filling in the inodes. For small block + * sizes, manipulate the inodes in buffers which are multiples of the + * blocks size. + */ + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + nbufs = length / blks_per_cluster; + + /* + * Figure out what version number to use in the inodes we create. If + * the superblock version has caught up to the one that supports the new + * inode format, then use the new inode version. Otherwise use the old + * version so that old kernels will continue to be able to use the file + * system. + * + * For v3 inodes, we also need to write the inode number into the inode, + * so calculate the first inode number of the chunk here as + * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not + * across multiple filesystem blocks (such as a cluster) and so cannot + * be used in the cluster buffer loop below. + * + * Further, because we are writing the inode directly into the buffer + * and calculating a CRC on the entire inode, we have ot log the entire + * inode so that the entire range the CRC covers is present in the log. + * That means for v3 inode we log the entire buffer rather than just the + * inode cores. + */ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + version = 3; + ino = XFS_AGINO_TO_INO(mp, agno, + XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); + + /* + * log the initialisation that is about to take place as an + * logical operation. This means the transaction does not + * need to log the physical changes to the inode buffers as log + * recovery will know what initialisation is actually needed. + * Hence we only need to log the buffers as "ordered" buffers so + * they track in the AIL as if they were physically logged. + */ + if (tp) + xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, + mp->m_sb.sb_inodesize, length, gen); + } else + version = 2; + + for (j = 0; j < nbufs; j++) { + /* + * Get the block. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); + fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize * blks_per_cluster, + XBF_UNMAPPED); + if (!fbuf) + return ENOMEM; + + /* Initialize the inode buffers and log them appropriately. */ + fbuf->b_ops = &xfs_inode_buf_ops; + xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); + for (i = 0; i < inodes_per_cluster; i++) { + int ioffset = i << mp->m_sb.sb_inodelog; + uint isize = xfs_dinode_size(version); + + free = xfs_make_iptr(mp, fbuf, i); + free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + free->di_version = version; + free->di_gen = cpu_to_be32(gen); + free->di_next_unlinked = cpu_to_be32(NULLAGINO); + + if (version == 3) { + free->di_ino = cpu_to_be64(ino); + ino++; + uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); + xfs_dinode_calc_crc(mp, free); + } else if (tp) { + /* just log the inode core */ + xfs_trans_log_buf(tp, fbuf, ioffset, + ioffset + isize - 1); + } + } + + if (tp) { + /* + * Mark the buffer as an inode allocation buffer so it + * sticks in AIL at the point of this allocation + * transaction. This ensures the they are on disk before + * the tail of the log can be moved past this + * transaction (i.e. by preventing relogging from moving + * it forward in the log). + */ + xfs_trans_inode_alloc_buf(tp, fbuf); + if (version == 3) { + /* + * Mark the buffer as ordered so that they are + * not physically logged in the transaction but + * still tracked in the AIL as part of the + * transaction and pin the log appropriately. + */ + xfs_trans_ordered_buf(tp, fbuf); + xfs_trans_log_buf(tp, fbuf, 0, + BBTOB(fbuf->b_length) - 1); + } + } else { + fbuf->b_flags |= XBF_DONE; + xfs_buf_delwri_queue(fbuf, buffer_list); + xfs_buf_relse(fbuf); + } + } + return 0; +} + +/* + * Allocate new inodes in the allocation group specified by agbp. + * Return 0 for success, else error code. + */ +STATIC int /* error code or 0 */ +xfs_ialloc_ag_alloc( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* alloc group buffer */ + int *alloc) +{ + xfs_agi_t *agi; /* allocation group header */ + xfs_alloc_arg_t args; /* allocation argument structure */ + xfs_agnumber_t agno; + int error; + xfs_agino_t newino; /* new first inode's number */ + xfs_agino_t newlen; /* new number of inodes */ + int isaligned = 0; /* inode allocation at stripe unit */ + /* boundary */ + struct xfs_perag *pag; + + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = tp->t_mountp; + + /* + * Locking will ensure that we don't have two callers in here + * at one time. + */ + newlen = args.mp->m_ialloc_inos; + if (args.mp->m_maxicount && + args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) + return ENOSPC; + args.minlen = args.maxlen = args.mp->m_ialloc_blks; + /* + * First try to allocate inodes contiguous with the last-allocated + * chunk of inodes. If the filesystem is striped, this will fill + * an entire stripe unit with inodes. + */ + agi = XFS_BUF_TO_AGI(agbp); + newino = be32_to_cpu(agi->agi_newino); + agno = be32_to_cpu(agi->agi_seqno); + args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + + args.mp->m_ialloc_blks; + if (likely(newino != NULLAGINO && + (args.agbno < be32_to_cpu(agi->agi_length)))) { + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.prod = 1; + + /* + * We need to take into account alignment here to ensure that + * we don't modify the free list if we fail to have an exact + * block. If we don't have an exact match, and every oher + * attempt allocation attempt fails, we'll end up cancelling + * a dirty transaction and shutting down. + * + * For an exact allocation, alignment must be 1, + * however we need to take cluster alignment into account when + * fixing up the freelist. Use the minalignslop field to + * indicate that extra blocks might be required for alignment, + * but not to use them in the actual exact allocation. + */ + args.alignment = 1; + args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; + + /* Allow space for the inode btree to split. */ + args.minleft = args.mp->m_in_maxlevels - 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + + /* + * This request might have dirtied the transaction if the AG can + * satisfy the request, but the exact block was not available. + * If the allocation did fail, subsequent requests will relax + * the exact agbno requirement and increase the alignment + * instead. It is critical that the total size of the request + * (len + alignment + slop) does not increase from this point + * on, so reset minalignslop to ensure it is not included in + * subsequent requests. + */ + args.minalignslop = 0; + } else + args.fsbno = NULLFSBLOCK; + + if (unlikely(args.fsbno == NULLFSBLOCK)) { + /* + * Set the alignment for the allocation. + * If stripe alignment is turned on then align at stripe unit + * boundary. + * If the cluster size is smaller than a filesystem block + * then we're doing I/O for inodes in filesystem block size + * pieces, so don't need alignment anyway. + */ + isaligned = 0; + if (args.mp->m_sinoalign) { + ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); + args.alignment = args.mp->m_dalign; + isaligned = 1; + } else + args.alignment = xfs_ialloc_cluster_alignment(&args); + /* + * Need to figure out where to allocate the inode blocks. + * Ideally they should be spaced out through the a.g. + * For now, just allocate blocks up front. + */ + args.agbno = be32_to_cpu(agi->agi_root); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + /* + * Allocate a fixed-size extent of inodes. + */ + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.prod = 1; + /* + * Allow space for the inode btree to split. + */ + args.minleft = args.mp->m_in_maxlevels - 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + + /* + * If stripe alignment is turned on, then try again with cluster + * alignment. + */ + if (isaligned && args.fsbno == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.agbno = be32_to_cpu(agi->agi_root); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.alignment = xfs_ialloc_cluster_alignment(&args); + if ((error = xfs_alloc_vextent(&args))) + return error; + } + + if (args.fsbno == NULLFSBLOCK) { + *alloc = 0; + return 0; + } + ASSERT(args.len == args.minlen); + + /* + * Stamp and write the inode buffers. + * + * Seed the new inode cluster with a random generation number. This + * prevents short-term reuse of generation numbers if a chunk is + * freed and then immediately reallocated. We use random numbers + * rather than a linear progression to prevent the next generation + * number from being easily guessable. + */ + error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, + args.len, prandom_u32()); + + if (error) + return error; + /* + * Convert the results. + */ + newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); + be32_add_cpu(&agi->agi_count, newlen); + be32_add_cpu(&agi->agi_freecount, newlen); + pag = xfs_perag_get(args.mp, agno); + pag->pagi_freecount += newlen; + xfs_perag_put(pag); + agi->agi_newino = cpu_to_be32(newino); + + /* + * Insert records describing the new inode chunk into the btrees. + */ + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_INO); + if (error) + return error; + + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_FINO); + if (error) + return error; + } + /* + * Log allocation group header fields + */ + xfs_ialloc_log_agi(tp, agbp, + XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); + /* + * Modify/log superblock values for inode count and inode free count. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); + *alloc = 1; + return 0; +} + +STATIC xfs_agnumber_t +xfs_ialloc_next_ag( + xfs_mount_t *mp) +{ + xfs_agnumber_t agno; + + spin_lock(&mp->m_agirotor_lock); + agno = mp->m_agirotor; + if (++mp->m_agirotor >= mp->m_maxagi) + mp->m_agirotor = 0; + spin_unlock(&mp->m_agirotor_lock); + + return agno; +} + +/* + * Select an allocation group to look for a free inode in, based on the parent + * inode and the mode. Return the allocation group buffer. + */ +STATIC xfs_agnumber_t +xfs_ialloc_ag_select( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent directory inode number */ + umode_t mode, /* bits set to indicate file type */ + int okalloc) /* ok to allocate more space */ +{ + xfs_agnumber_t agcount; /* number of ag's in the filesystem */ + xfs_agnumber_t agno; /* current ag number */ + int flags; /* alloc buffer locking flags */ + xfs_extlen_t ineed; /* blocks needed for inode allocation */ + xfs_extlen_t longest = 0; /* longest extent available */ + xfs_mount_t *mp; /* mount point structure */ + int needspace; /* file mode implies space allocated */ + xfs_perag_t *pag; /* per allocation group data */ + xfs_agnumber_t pagno; /* parent (starting) ag number */ + int error; + + /* + * Files of these types need at least one block if length > 0 + * (and they won't fit in the inode, but that's hard to figure out). + */ + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); + mp = tp->t_mountp; + agcount = mp->m_maxagi; + if (S_ISDIR(mode)) + pagno = xfs_ialloc_next_ag(mp); + else { + pagno = XFS_INO_TO_AGNO(mp, parent); + if (pagno >= agcount) + pagno = 0; + } + + ASSERT(pagno < agcount); + + /* + * Loop through allocation groups, looking for one with a little + * free space in it. Note we don't look for free inodes, exactly. + * Instead, we include whether there is a need to allocate inodes + * to mean that blocks must be allocated for them, + * if none are currently free. + */ + agno = pagno; + flags = XFS_ALLOC_FLAG_TRYLOCK; + for (;;) { + pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + + if (!pag->pagi_init) { + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) + goto nextag; + } + + if (pag->pagi_freecount) { + xfs_perag_put(pag); + return agno; + } + + if (!okalloc) + goto nextag; + + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, agno, flags); + if (error) + goto nextag; + } + + /* + * Is there enough free space for the file plus a block of + * inodes? (if we need to allocate some)? + */ + ineed = mp->m_ialloc_blks; + longest = pag->pagf_longest; + if (!longest) + longest = pag->pagf_flcount > 0; + + if (pag->pagf_freeblks >= needspace + ineed && + longest >= ineed) { + xfs_perag_put(pag); + return agno; + } +nextag: + xfs_perag_put(pag); + /* + * No point in iterating over the rest, if we're shutting + * down. + */ + if (XFS_FORCED_SHUTDOWN(mp)) + return NULLAGNUMBER; + agno++; + if (agno >= agcount) + agno = 0; + if (agno == pagno) { + if (flags == 0) + return NULLAGNUMBER; + flags = 0; + } + } +} + +/* + * Try to retrieve the next record to the left/right from the current one. + */ +STATIC int +xfs_ialloc_next_rec( + struct xfs_btree_cur *cur, + xfs_inobt_rec_incore_t *rec, + int *done, + int left) +{ + int error; + int i; + + if (left) + error = xfs_btree_decrement(cur, 0, &i); + else + error = xfs_btree_increment(cur, 0, &i); + + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +STATIC int +xfs_ialloc_get_rec( + struct xfs_btree_cur *cur, + xfs_agino_t agino, + xfs_inobt_rec_incore_t *rec, + int *done) +{ + int error; + int i; + + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +/* + * Allocate an inode using the inobt-only algorithm. + */ +STATIC int +xfs_dialloc_ag_inobt( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur, *tcur; + struct xfs_inobt_rec_incore rec, trec; + xfs_ino_t ino; + int error; + int offset; + int i, j; + + pag = xfs_perag_get(mp, agno); + + ASSERT(pag->pagi_init); + ASSERT(pag->pagi_inodeok); + ASSERT(pag->pagi_freecount > 0); + + restart_pagno: + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = be32_to_cpu(agi->agi_newino); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + + /* + * If in the same AG as the parent, try to get near the parent. + */ + if (pagno == agno) { + int doneleft; /* done, to the left */ + int doneright; /* done, to the right */ + int searchdistance = 10; + + error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_inobt_get_rec(cur, &rec, &j); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(j == 1, error0); + + if (rec.ir_freecount > 0) { + /* + * Found a free inode in the same chunk + * as the parent, done. + */ + goto alloc_inode; + } + + + /* + * In the same AG as parent, but parent's chunk is full. + */ + + /* duplicate the cursor, search left & right simultaneously */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + + /* + * Skip to last blocks looked up if same parent inode. + */ + if (pagino != NULLAGINO && + pag->pagl_pagino == pagino && + pag->pagl_leftrec != NULLAGINO && + pag->pagl_rightrec != NULLAGINO) { + error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, + &trec, &doneleft); + if (error) + goto error1; + + error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, + &rec, &doneright); + if (error) + goto error1; + } else { + /* search left with tcur, back up 1 record */ + error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); + if (error) + goto error1; + + /* search right with cur, go forward 1 record. */ + error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); + if (error) + goto error1; + } + + /* + * Loop until we find an inode chunk with a free inode. + */ + while (!doneleft || !doneright) { + int useleft; /* using left inode chunk this time */ + + if (!--searchdistance) { + /* + * Not in range - save last search + * location and allocate a new inode + */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto newino; + } + + /* figure out the closer block if both are valid. */ + if (!doneleft && !doneright) { + useleft = pagino - + (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < + rec.ir_startino - pagino; + } else { + useleft = !doneleft; + } + + /* free inodes to the left? */ + if (useleft && trec.ir_freecount) { + rec = trec; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + cur = tcur; + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* free inodes to the right? */ + if (!useleft && rec.ir_freecount) { + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* get next record to check */ + if (useleft) { + error = xfs_ialloc_next_rec(tcur, &trec, + &doneleft, 1); + } else { + error = xfs_ialloc_next_rec(cur, &rec, + &doneright, 0); + } + if (error) + goto error1; + } + + /* + * We've reached the end of the btree. because + * we are only searching a small chunk of the + * btree each search, there is obviously free + * inodes closer to the parent inode than we + * are now. restart the search again. + */ + pag->pagl_pagino = NULLAGINO; + pag->pagl_leftrec = NULLAGINO; + pag->pagl_rightrec = NULLAGINO; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + goto restart_pagno; + } + + /* + * In a different AG from the parent. + * See if the most recently allocated block has any free. + */ +newino: + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), + XFS_LOOKUP_EQ, &i); + if (error) + goto error0; + + if (i == 1) { + error = xfs_inobt_get_rec(cur, &rec, &j); + if (error) + goto error0; + + if (j == 1 && rec.ir_freecount > 0) { + /* + * The last chunk allocated in the group + * still has a free inode. + */ + goto alloc_inode; + } + } + } + + /* + * None left in the last group, search the whole AG + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + for (;;) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (rec.ir_freecount > 0) + break; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + +alloc_inode: + offset = xfs_lowbit64(rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + error = xfs_inobt_update(cur, &rec); + if (error) + goto error0; + be32_add_cpu(&agi->agi_freecount, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag->pagi_freecount--; + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + xfs_perag_put(pag); + *inop = ino; + return 0; +error1: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); + return error; +} + +/* + * Use the free inode btree to allocate an inode based on distance from the + * parent. Note that the provided cursor may be deleted and replaced. + */ +STATIC int +xfs_dialloc_ag_finobt_near( + xfs_agino_t pagino, + struct xfs_btree_cur **ocur, + struct xfs_inobt_rec_incore *rec) +{ + struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ + struct xfs_btree_cur *rcur; /* right search cursor */ + struct xfs_inobt_rec_incore rrec; + int error; + int i, j; + + error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); + if (error) + return error; + + if (i == 1) { + error = xfs_inobt_get_rec(lcur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + /* + * See if we've landed in the parent inode record. The finobt + * only tracks chunks with at least one free inode, so record + * existence is enough. + */ + if (pagino >= rec->ir_startino && + pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) + return 0; + } + + error = xfs_btree_dup_cursor(lcur, &rcur); + if (error) + return error; + + error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); + if (error) + goto error_rcur; + if (j == 1) { + error = xfs_inobt_get_rec(rcur, &rrec, &j); + if (error) + goto error_rcur; + XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); + } + + XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); + if (i == 1 && j == 1) { + /* + * Both the left and right records are valid. Choose the closer + * inode chunk to the target. + */ + if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > + (rrec.ir_startino - pagino)) { + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else { + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + } else if (j == 1) { + /* only the right record is valid */ + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else if (i == 1) { + /* only the left record is valid */ + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + + return 0; + +error_rcur: + xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Use the free inode btree to find a free inode based on a newino hint. If + * the hint is NULL, find the first free inode in the AG. + */ +STATIC int +xfs_dialloc_ag_finobt_newino( + struct xfs_agi *agi, + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *rec) +{ + int error; + int i; + + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, + &i); + if (error) + return error; + if (i == 1) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; + } + } + + /* + * Find the first inode available in the AG. + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; +} + +/* + * Update the inobt based on a modification made to the finobt. Also ensure that + * the records from both trees are equivalent post-modification. + */ +STATIC int +xfs_dialloc_ag_update_inobt( + struct xfs_btree_cur *cur, /* inobt cursor */ + struct xfs_inobt_rec_incore *frec, /* finobt record */ + int offset) /* inode offset */ +{ + struct xfs_inobt_rec_incore rec; + int error; + int i; + + error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + + XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && + (rec.ir_freecount == frec->ir_freecount)); + + error = xfs_inobt_update(cur, &rec); + if (error) + return error; + + return 0; +} + +/* + * Allocate an inode using the free inode btree, if available. Otherwise, fall + * back to the inobt search algorithm. + * + * The caller selected an AG for us, and made sure that free inodes are + * available. + */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; /* finobt cursor */ + struct xfs_btree_cur *icur; /* inobt cursor */ + struct xfs_inobt_rec_incore rec; + xfs_ino_t ino; + int error; + int offset; + int i; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + + pag = xfs_perag_get(mp, agno); + + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = be32_to_cpu(agi->agi_newino); + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_cur; + + /* + * The search algorithm depends on whether we're in the same AG as the + * parent. If so, find the closest available inode to the parent. If + * not, consider the agi hint or find the first free inode in the AG. + */ + if (agno == pagno) + error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); + else + error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); + if (error) + goto error_cur; + + offset = xfs_lowbit64(rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + + /* + * Modify or remove the finobt record. + */ + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + if (rec.ir_freecount) + error = xfs_inobt_update(cur, &rec); + else + error = xfs_btree_delete(cur, &i); + if (error) + goto error_cur; + + /* + * The finobt has now been updated appropriately. We haven't updated the + * agi and superblock yet, so we can create an inobt cursor and validate + * the original freecount. If all is well, make the equivalent update to + * the inobt using the finobt record and offset information. + */ + icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + + error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); + if (error) + goto error_icur; + + /* + * Both trees have now been updated. We must update the perag and + * superblock before we can check the freecount for each btree. + */ + be32_add_cpu(&agi->agi_freecount, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag->pagi_freecount--; + + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_icur; + + xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_perag_put(pag); + *inop = ino; + return 0; + +error_icur: + xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); +error_cur: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); + return error; +} + +/* + * Allocate an inode on disk. + * + * Mode is used to tell whether the new inode will need space, and whether it + * is a directory. + * + * This function is designed to be called twice if it has to do an allocation + * to make more free inodes. On the first call, *IO_agbp should be set to NULL. + * If an inode is available without having to performn an allocation, an inode + * number is returned. In this case, *IO_agbp is set to NULL. If an allocation + * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. + * The caller should then commit the current transaction, allocate a + * new transaction, and call xfs_dialloc() again, passing in the previous value + * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI + * buffer is locked across the two calls, the second call is guaranteed to have + * a free inode available. + * + * Once we successfully pick an inode its number is returned and the on-disk + * data structures are updated. The inode itself is not read in, since doing so + * would break ordering constraints with xfs_reclaim. + */ +int +xfs_dialloc( + struct xfs_trans *tp, + xfs_ino_t parent, + umode_t mode, + int okalloc, + struct xfs_buf **IO_agbp, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *agbp; + xfs_agnumber_t agno; + int error; + int ialloced; + int noroom = 0; + xfs_agnumber_t start_agno; + struct xfs_perag *pag; + + if (*IO_agbp) { + /* + * If the caller passes in a pointer to the AGI buffer, + * continue where we left off before. In this case, we + * know that the allocation group has free inodes. + */ + agbp = *IO_agbp; + goto out_alloc; + } + + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + if (start_agno == NULLAGNUMBER) { + *inop = NULLFSINO; + return 0; + } + + /* + * If we have already hit the ceiling of inode blocks then clear + * okalloc so we scan all available agi structures for a free + * inode. + */ + if (mp->m_maxicount && + mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { + noroom = 1; + okalloc = 0; + } + + /* + * Loop until we find an allocation group that either has free inodes + * or in which we can allocate some inodes. Iterate through the + * allocation groups upward, wrapping at the end. + */ + agno = start_agno; + for (;;) { + pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + + if (!pag->pagi_init) { + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) + goto out_error; + } + + /* + * Do a first racy fast path check if this AG is usable. + */ + if (!pag->pagi_freecount && !okalloc) + goto nextag; + + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + goto out_error; + + if (pag->pagi_freecount) { + xfs_perag_put(pag); + goto out_alloc; + } + + if (!okalloc) + goto nextag_relse_buffer; + + + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); + if (error) { + xfs_trans_brelse(tp, agbp); + + if (error != ENOSPC) + goto out_error; + + xfs_perag_put(pag); + *inop = NULLFSINO; + return 0; + } + + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(pag->pagi_freecount > 0); + xfs_perag_put(pag); + + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; + } + +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); +nextag: + xfs_perag_put(pag); + if (++agno == mp->m_sb.sb_agcount) + agno = 0; + if (agno == start_agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } + } + +out_alloc: + *IO_agbp = NULL; + return xfs_dialloc_ag(tp, agbp, parent, inop); +out_error: + xfs_perag_put(pag); + return error; +} + +STATIC int +xfs_difree_inobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_bmap_free *flist, + int *deleted, + xfs_ino_t *first_ino, + struct xfs_inobt_rec_incore *orec) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int ilen; + int error; + int i; + int off; + + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); + + /* + * Initialize the cursor. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + + /* + * Look for the entry describing this inode. + */ + if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { + xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", + __func__, error); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) { + xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", + __func__, error); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Get the offset in the inode chunk. + */ + off = agino - rec.ir_startino; + ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); + ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); + /* + * Mark the inode free & increment the count. + */ + rec.ir_free |= XFS_INOBT_MASK(off); + rec.ir_freecount++; + + /* + * When an inode cluster is free, it becomes eligible for removal + */ + if (!(mp->m_flags & XFS_MOUNT_IKEEP) && + (rec.ir_freecount == mp->m_ialloc_inos)) { + + *deleted = 1; + *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); + + /* + * Remove the inode cluster from the AGI B+Tree, adjust the + * AGI and Superblock inode counts, and mark the disk space + * to be freed when the transaction is committed. + */ + ilen = mp->m_ialloc_inos; + be32_add_cpu(&agi->agi_count, -ilen); + be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount -= ilen - 1; + xfs_perag_put(pag); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); + + if ((error = xfs_btree_delete(cur, &i))) { + xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", + __func__, error); + goto error0; + } + + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, + XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), + mp->m_ialloc_blks, flist, mp); + } else { + *deleted = 0; + + error = xfs_inobt_update(cur, &rec); + if (error) { + xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", + __func__, error); + goto error0; + } + + /* + * Change the inode free counts and log the ag/sb changes. + */ + be32_add_cpu(&agi->agi_freecount, 1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount++; + xfs_perag_put(pag); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); + } + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + + *orec = rec; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; + +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free an inode in the free inode btree. + */ +STATIC int +xfs_difree_finobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int offset = agino - ibtrec->ir_startino; + int error; + int i; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + if (i == 0) { + /* + * If the record does not exist in the finobt, we must have just + * freed an inode in a previously fully allocated chunk. If not, + * something is out of sync. + */ + XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + + error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, + ibtrec->ir_free, &i); + if (error) + goto error; + ASSERT(i == 1); + + goto out; + } + + /* + * Read and update the existing record. We could just copy the ibtrec + * across here, but that would defeat the purpose of having redundant + * metadata. By making the modifications independently, we can catch + * corruptions that we wouldn't see if we just copied from one record + * to another. + */ + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(i == 1, error); + + rec.ir_free |= XFS_INOBT_MASK(offset); + rec.ir_freecount++; + + XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && + (rec.ir_freecount == ibtrec->ir_freecount), + error); + + /* + * The content of inobt records should always match between the inobt + * and finobt. The lifecycle of records in the finobt is different from + * the inobt in that the finobt only tracks records with at least one + * free inode. Hence, if all of the inodes are free and we aren't + * keeping inode chunks permanently on disk, remove the record. + * Otherwise, update the record with the new information. + */ + if (rec.ir_freecount == mp->m_ialloc_inos && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + error = xfs_btree_delete(cur, &i); + if (error) + goto error; + ASSERT(i == 1); + } else { + error = xfs_inobt_update(cur, &rec); + if (error) + goto error; + } + +out: + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error; + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; + +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t inode, /* inode to be freed */ + struct xfs_bmap_free *flist, /* extents to free */ + int *deleted,/* set if inode cluster was deleted */ + xfs_ino_t *first_ino)/* first inode in deleted cluster */ +{ + /* REFERENCED */ + xfs_agblock_t agbno; /* block number containing inode */ + struct xfs_buf *agbp; /* buffer for allocation group header */ + xfs_agino_t agino; /* allocation group inode number */ + xfs_agnumber_t agno; /* allocation group number */ + int error; /* error return value */ + struct xfs_mount *mp; /* mount structure for filesystem */ + struct xfs_inobt_rec_incore rec;/* btree record */ + + mp = tp->t_mountp; + + /* + * Break up inode number into its components. + */ + agno = XFS_INO_TO_AGNO(mp, inode); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", + __func__, agno, mp->m_sb.sb_agcount); + ASSERT(0); + return EINVAL; + } + agino = XFS_INO_TO_AGINO(mp, inode); + if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + __func__, (unsigned long long)inode, + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); + ASSERT(0); + return EINVAL; + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", + __func__, agbno, mp->m_sb.sb_agblocks); + ASSERT(0); + return EINVAL; + } + /* + * Get the allocation group header. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", + __func__, error); + return error; + } + + /* + * Fix up the inode allocation btree. + */ + error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, + &rec); + if (error) + goto error0; + + /* + * Fix up the free inode btree. + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); + if (error) + goto error0; + } + + return 0; + +error0: + return error; +} + +STATIC int +xfs_imap_lookup( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_agblock_t *chunk_agbno, + xfs_agblock_t *offset_agbno, + int flags) +{ + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + int error; + int i; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_alert(mp, + "%s: xfs_ialloc_read_agi() returned error %d, agno %d", + __func__, error, agno); + return error; + } + + /* + * Lookup the inode record for the given agino. If the record cannot be + * found, then it's an invalid inode number and we should abort. Once + * we have a record, we need to ensure it contains the inode number + * we are looking up. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &rec, &i); + if (!error && i == 0) + error = EINVAL; + } + + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + + /* check that the returned record contains the required inode */ + if (rec.ir_startino > agino || + rec.ir_startino + mp->m_ialloc_inos <= agino) + return EINVAL; + + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_UNTRUSTED) && + (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) + return EINVAL; + + *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); + *offset_agbno = agbno - *chunk_agbno; + return 0; +} + +/* + * Return the location of the inode in imap, for mapping it into a buffer. + */ +int +xfs_imap( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t ino, /* inode to locate */ + struct xfs_imap *imap, /* location map structure */ + uint flags) /* flags for inode btree lookup */ +{ + xfs_agblock_t agbno; /* block number of inode in the alloc group */ + xfs_agino_t agino; /* inode number within alloc group */ + xfs_agnumber_t agno; /* allocation group number */ + int blks_per_cluster; /* num blocks per inode cluster */ + xfs_agblock_t chunk_agbno; /* first block in inode chunk */ + xfs_agblock_t cluster_agbno; /* first block in inode cluster */ + int error; /* error code */ + int offset; /* index of inode in its buffer */ + xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ + + ASSERT(ino != NULLFSINO); + + /* + * Split up the inode number into its parts. + */ + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || + ino != XFS_AGINO_TO_INO(mp, agno, agino)) { +#ifdef DEBUG + /* + * Don't output diagnostic information for untrusted inodes + * as they can be invalid without implying corruption. + */ + if (flags & XFS_IGET_UNTRUSTED) + return EINVAL; + if (agno >= mp->m_sb.sb_agcount) { + xfs_alert(mp, + "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", + __func__, agno, mp->m_sb.sb_agcount); + } + if (agbno >= mp->m_sb.sb_agblocks) { + xfs_alert(mp, + "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", + __func__, (unsigned long long)agbno, + (unsigned long)mp->m_sb.sb_agblocks); + } + if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + xfs_alert(mp, + "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", + __func__, ino, + XFS_AGINO_TO_INO(mp, agno, agino)); + } + xfs_stack_trace(); +#endif /* DEBUG */ + return EINVAL; + } + + blks_per_cluster = xfs_icluster_size_fsb(mp); + + /* + * For bulkstat and handle lookups, we have an untrusted inode number + * that we have to verify is valid. We cannot do this just by reading + * the inode buffer as it may have been unlinked and removed leaving + * inodes in stale state on disk. Hence we have to do a btree lookup + * in all cases where an untrusted inode number is passed. + */ + if (flags & XFS_IGET_UNTRUSTED) { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + goto out_map; + } + + /* + * If the inode cluster size is the same as the blocksize or + * smaller we get to the buffer by simple arithmetics. + */ + if (blks_per_cluster == 1) { + offset = XFS_INO_TO_OFFSET(mp, ino); + ASSERT(offset < mp->m_sb.sb_inopblock); + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); + imap->im_len = XFS_FSB_TO_BB(mp, 1); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + return 0; + } + + /* + * If the inode chunks are aligned then use simple maths to + * find the location. Otherwise we have to do a btree + * lookup to find the location. + */ + if (mp->m_inoalign_mask) { + offset_agbno = agbno & mp->m_inoalign_mask; + chunk_agbno = agbno - offset_agbno; + } else { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + } + +out_map: + ASSERT(agbno >= chunk_agbno); + cluster_agbno = chunk_agbno + + ((offset_agbno / blks_per_cluster) * blks_per_cluster); + offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + + XFS_INO_TO_OFFSET(mp, ino); + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); + imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap->im_blkno + imap->im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + xfs_alert(mp, + "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", + __func__, (unsigned long long) imap->im_blkno, + (unsigned long long) imap->im_len, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); + return EINVAL; + } + return 0; +} + +/* + * Compute and fill in value of m_in_maxlevels. + */ +void +xfs_ialloc_compute_maxlevels( + xfs_mount_t *mp) /* file system mount structure */ +{ + int level; + uint maxblocks; + uint maxleafents; + int minleafrecs; + int minnoderecs; + + maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> + XFS_INODES_PER_CHUNK_LOG; + minleafrecs = mp->m_alloc_mnr[0]; + minnoderecs = mp->m_alloc_mnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + mp->m_in_maxlevels = level; +} + +/* + * Log specified fields for the ag hdr (inode section). The growth of the agi + * structure over time requires that we interpret the buffer as two logical + * regions delineated by the end of the unlinked list. This is due to the size + * of the hash table and its location in the middle of the agi. + * + * For example, a request to log a field before agi_unlinked and a field after + * agi_unlinked could cause us to log the entire hash table and use an excessive + * amount of log space. To avoid this behavior, log the region up through + * agi_unlinked in one call and the region after agi_unlinked through the end of + * the structure in another. + */ +void +xfs_ialloc_log_agi( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* allocation group header buffer */ + int fields) /* bitmask of fields to log */ +{ + int first; /* first byte number */ + int last; /* last byte number */ + static const short offsets[] = { /* field starting offsets */ + /* keep in sync with bit definitions */ + offsetof(xfs_agi_t, agi_magicnum), + offsetof(xfs_agi_t, agi_versionnum), + offsetof(xfs_agi_t, agi_seqno), + offsetof(xfs_agi_t, agi_length), + offsetof(xfs_agi_t, agi_count), + offsetof(xfs_agi_t, agi_root), + offsetof(xfs_agi_t, agi_level), + offsetof(xfs_agi_t, agi_freecount), + offsetof(xfs_agi_t, agi_newino), + offsetof(xfs_agi_t, agi_dirino), + offsetof(xfs_agi_t, agi_unlinked), + offsetof(xfs_agi_t, agi_free_root), + offsetof(xfs_agi_t, agi_free_level), + sizeof(xfs_agi_t) + }; +#ifdef DEBUG + xfs_agi_t *agi; /* allocation group header */ + + agi = XFS_BUF_TO_AGI(bp); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); +#endif + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); + + /* + * Compute byte offsets for the first and last fields in the first + * region and log the agi buffer. This only logs up through + * agi_unlinked. + */ + if (fields & XFS_AGI_ALL_BITS_R1) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } + + /* + * Mask off the bits in the first region and calculate the first and + * last field offsets for any bits in the second region. + */ + fields &= ~XFS_AGI_ALL_BITS_R1; + if (fields) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } +} + +#ifdef DEBUG +STATIC void +xfs_check_agi_unlinked( + struct xfs_agi *agi) +{ + int i; + + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) + ASSERT(agi->agi_unlinked[i]); +} +#else +#define xfs_check_agi_unlinked(agi) +#endif + +static bool +xfs_agi_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) + return false; + /* + * Validate the magic number of the agi block. + */ + if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) + return false; + if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) + return false; + + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) + return false; + + xfs_check_agi_unlinked(agi); + return true; +} + +static void +xfs_agi_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, + XFS_ERRTAG_IALLOC_READ_AGI, + XFS_RANDOM_IALLOC_READ_AGI)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_agi_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_agi_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); +} + +const struct xfs_buf_ops xfs_agi_buf_ops = { + .verify_read = xfs_agi_read_verify, + .verify_write = xfs_agi_write_verify, +}; + +/* + * Read in the allocation group header (inode allocation section) + */ +int +xfs_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ +{ + int error; + + trace_xfs_read_agi(mp, agno); + + ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); + if (error) + return error; + + xfs_buf_set_ref(*bpp, XFS_AGI_REF); + return 0; +} + +int +xfs_ialloc_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ +{ + struct xfs_agi *agi; /* allocation group header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + trace_xfs_ialloc_read_agi(mp, agno); + + error = xfs_read_agi(mp, tp, agno, bpp); + if (error) + return error; + + agi = XFS_BUF_TO_AGI(*bpp); + pag = xfs_perag_get(mp, agno); + if (!pag->pagi_init) { + pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); + pag->pagi_count = be32_to_cpu(agi->agi_count); + pag->pagi_init = 1; + } + + /* + * It's possible for these to be out of sync if + * we are in the middle of a forced shutdown. + */ + ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || + XFS_FORCED_SHUTDOWN(mp)); + xfs_perag_put(pag); + return 0; +} + +/* + * Read in the agi to initialise the per-ag data in the mount structure + */ +int +xfs_ialloc_pagi_init( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno) /* allocation group number */ +{ + xfs_buf_t *bp = NULL; + int error; + + error = xfs_ialloc_read_agi(mp, tp, agno, &bp); + if (error) + return error; + if (bp) + xfs_trans_brelse(tp, bp); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c new file mode 100644 index 000000000000..726f83a681a5 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" + + +STATIC int +xfs_inobt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mnr[level != 0]; +} + +STATIC struct xfs_btree_cur * +xfs_inobt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + +STATIC void +xfs_inobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_root = nptr->s; + be32_add_cpu(&agi->agi_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); +} + +STATIC void +xfs_finobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_free_root = nptr->s; + be32_add_cpu(&agi->agi_free_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, + XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); +} + +STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + xfs_agblock_t sbno = be32_to_cpu(start->s); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); + args.minlen = 1; + args.maxlen = 1; + args.prod = 1; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + + error = xfs_alloc_vextent(&args); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + + new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); + *stat = 1; + return 0; +} + +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + xfs_fsblock_t fsbno; + int error; + + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_free_extent(cur->bc_tp, fsbno, 1); + if (error) + return error; + + xfs_trans_binval(cur->bc_tp, bp); + return error; +} + +STATIC int +xfs_inobt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mxr[level != 0]; +} + +STATIC void +xfs_inobt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->inobt.ir_startino = rec->inobt.ir_startino; +} + +STATIC void +xfs_inobt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = key->inobt.ir_startino; +} + +STATIC void +xfs_inobt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); + rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); + rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); +} + +/* + * initial value of ptr for lookup + */ +STATIC void +xfs_inobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + + ptr->s = agi->agi_root; +} + +STATIC void +xfs_finobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ptr->s = agi->agi_free_root; +} + +STATIC __int64_t +xfs_inobt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - + cur->bc_rec.i.ir_startino; +} + +static int +xfs_inobt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_perag *pag = bp->b_pag; + unsigned int level; + + /* + * During growfs operations, we can't verify the exact owner as the + * perag is not fully initialised and hence not attached to the buffer. + * + * Similarly, during log recovery we will have a perag structure + * attached, but the agi information will not yet have been initialised + * from the on disk AGI. We don't currently use any of this information, + * but beware of the landmine (i.e. need to check pag->pagi_init) if we + * ever do. + */ + switch (block->bb_magic) { + case cpu_to_be32(XFS_IBT_CRC_MAGIC): + case cpu_to_be32(XFS_FIBT_CRC_MAGIC): + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + return false; + if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) + return false; + if (pag && + be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) + return false; + /* fall through */ + case cpu_to_be32(XFS_IBT_MAGIC): + case cpu_to_be32(XFS_FIBT_MAGIC): + break; + default: + return 0; + } + + /* numrecs and level verification */ + level = be16_to_cpu(block->bb_level); + if (level >= mp->m_in_maxlevels) + return false; + if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0]) + return false; + + /* sibling pointer verification */ + if (!block->bb_u.s.bb_leftsib || + (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && + block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) + return false; + if (!block->bb_u.s.bb_rightsib || + (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && + block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) + return false; + + return true; +} + +static void +xfs_inobt_read_verify( + struct xfs_buf *bp) +{ + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_inobt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); + } +} + +static void +xfs_inobt_write_verify( + struct xfs_buf *bp) +{ + if (!xfs_inobt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + xfs_btree_sblock_calc_crc(bp); + +} + +const struct xfs_buf_ops xfs_inobt_buf_ops = { + .verify_read = xfs_inobt_read_verify, + .verify_write = xfs_inobt_write_verify, +}; + +#if defined(DEBUG) || defined(XFS_WARN) +STATIC int +xfs_inobt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->inobt.ir_startino) < + be32_to_cpu(k2->inobt.ir_startino); +} + +STATIC int +xfs_inobt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= + be32_to_cpu(r2->inobt.ir_startino); +} +#endif /* DEBUG */ + +static const struct xfs_btree_ops xfs_inobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + + .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_inobt_set_root, + .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, + .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, + .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, + .buf_ops = &xfs_inobt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif +}; + +static const struct xfs_btree_ops xfs_finobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + + .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_finobt_set_root, + .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, + .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, + .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, + .buf_ops = &xfs_inobt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif +}; + +/* + * Allocate a new inode btree cursor. + */ +struct xfs_btree_cur * /* new inode btree cursor */ +xfs_inobt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agi structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* ialloc or free ino btree */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = btnum; + if (btnum == XFS_BTNUM_INO) { + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + cur->bc_ops = &xfs_inobt_ops; + } else { + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur->bc_ops = &xfs_finobt_ops; + } + + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + return cur; +} + +/* + * Calculate number of records in an inobt btree block. + */ +int +xfs_inobt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= XFS_INOBT_BLOCK_LEN(mp); + + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c new file mode 100644 index 000000000000..1e5366d7745e --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_icache.h" +#include "xfs_trans.h" +#include "xfs_ialloc.h" +#include "xfs_dinode.h" + +/* + * Check that none of the inode's in the buffer have a next + * unlinked field of 0. + */ +#if defined(DEBUG) +void +xfs_inobp_check( + xfs_mount_t *mp, + xfs_buf_t *bp) +{ + int i; + int j; + xfs_dinode_t *dip; + + j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; + + for (i = 0; i < j; i++) { + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + i * mp->m_sb.sb_inodesize); + if (!dip->di_next_unlinked) { + xfs_alert(mp, + "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", + i, (long long)bp->b_bn); + } + } +} +#endif + +/* + * If we are doing readahead on an inode buffer, we might be in log recovery + * reading an inode allocation buffer that hasn't yet been replayed, and hence + * has not had the inode cores stamped into it. Hence for readahead, the buffer + * may be potentially invalid. + * + * If the readahead buffer is invalid, we don't want to mark it with an error, + * but we do want to clear the DONE status of the buffer so that a followup read + * will re-read it from disk. This will ensure that we don't get an unnecessary + * warnings during log recovery and we don't get unnecssary panics on debug + * kernels. + */ +static void +xfs_inode_buf_verify( + struct xfs_buf *bp, + bool readahead) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + int i; + int ni; + + /* + * Validate the magic number and version of every inode in the buffer + */ + ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; + for (i = 0; i < ni; i++) { + int di_ok; + xfs_dinode_t *dip; + + dip = (struct xfs_dinode *)xfs_buf_offset(bp, + (i << mp->m_sb.sb_inodelog)); + di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && + XFS_DINODE_GOOD_VERSION(dip->di_version); + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, + XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { + if (readahead) { + bp->b_flags &= ~XBF_DONE; + return; + } + + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); +#ifdef DEBUG + xfs_alert(mp, + "bad inode magic/vsn daddr %lld #%d (magic=%x)", + (unsigned long long)bp->b_bn, i, + be16_to_cpu(dip->di_magic)); +#endif + } + } + xfs_inobp_check(mp, bp); +} + + +static void +xfs_inode_buf_read_verify( + struct xfs_buf *bp) +{ + xfs_inode_buf_verify(bp, false); +} + +static void +xfs_inode_buf_readahead_verify( + struct xfs_buf *bp) +{ + xfs_inode_buf_verify(bp, true); +} + +static void +xfs_inode_buf_write_verify( + struct xfs_buf *bp) +{ + xfs_inode_buf_verify(bp, false); +} + +const struct xfs_buf_ops xfs_inode_buf_ops = { + .verify_read = xfs_inode_buf_read_verify, + .verify_write = xfs_inode_buf_write_verify, +}; + +const struct xfs_buf_ops xfs_inode_buf_ra_ops = { + .verify_read = xfs_inode_buf_readahead_verify, + .verify_write = xfs_inode_buf_write_verify, +}; + + +/* + * This routine is called to map an inode to the buffer containing the on-disk + * version of the inode. It returns a pointer to the buffer containing the + * on-disk inode in the bpp parameter, and in the dipp parameter it returns a + * pointer to the on-disk inode within that buffer. + * + * If a non-zero error is returned, then the contents of bpp and dipp are + * undefined. + */ +int +xfs_imap_to_bp( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_imap *imap, + struct xfs_dinode **dipp, + struct xfs_buf **bpp, + uint buf_flags, + uint iget_flags) +{ + struct xfs_buf *bp; + int error; + + buf_flags |= XBF_UNMAPPED; + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, + (int)imap->im_len, buf_flags, &bp, + &xfs_inode_buf_ops); + if (error) { + if (error == EAGAIN) { + ASSERT(buf_flags & XBF_TRYLOCK); + return error; + } + + if (error == EFSCORRUPTED && + (iget_flags & XFS_IGET_UNTRUSTED)) + return EINVAL; + + xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", + __func__, error); + return error; + } + + *bpp = bp; + *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); + return 0; +} + +void +xfs_dinode_from_disk( + xfs_icdinode_t *to, + xfs_dinode_t *from) +{ + to->di_magic = be16_to_cpu(from->di_magic); + to->di_mode = be16_to_cpu(from->di_mode); + to->di_version = from ->di_version; + to->di_format = from->di_format; + to->di_onlink = be16_to_cpu(from->di_onlink); + to->di_uid = be32_to_cpu(from->di_uid); + to->di_gid = be32_to_cpu(from->di_gid); + to->di_nlink = be32_to_cpu(from->di_nlink); + to->di_projid_lo = be16_to_cpu(from->di_projid_lo); + to->di_projid_hi = be16_to_cpu(from->di_projid_hi); + memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); + to->di_flushiter = be16_to_cpu(from->di_flushiter); + to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); + to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); + to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); + to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); + to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); + to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); + to->di_size = be64_to_cpu(from->di_size); + to->di_nblocks = be64_to_cpu(from->di_nblocks); + to->di_extsize = be32_to_cpu(from->di_extsize); + to->di_nextents = be32_to_cpu(from->di_nextents); + to->di_anextents = be16_to_cpu(from->di_anextents); + to->di_forkoff = from->di_forkoff; + to->di_aformat = from->di_aformat; + to->di_dmevmask = be32_to_cpu(from->di_dmevmask); + to->di_dmstate = be16_to_cpu(from->di_dmstate); + to->di_flags = be16_to_cpu(from->di_flags); + to->di_gen = be32_to_cpu(from->di_gen); + + if (to->di_version == 3) { + to->di_changecount = be64_to_cpu(from->di_changecount); + to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); + to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); + to->di_flags2 = be64_to_cpu(from->di_flags2); + to->di_ino = be64_to_cpu(from->di_ino); + to->di_lsn = be64_to_cpu(from->di_lsn); + memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); + uuid_copy(&to->di_uuid, &from->di_uuid); + } +} + +void +xfs_dinode_to_disk( + xfs_dinode_t *to, + xfs_icdinode_t *from) +{ + to->di_magic = cpu_to_be16(from->di_magic); + to->di_mode = cpu_to_be16(from->di_mode); + to->di_version = from ->di_version; + to->di_format = from->di_format; + to->di_onlink = cpu_to_be16(from->di_onlink); + to->di_uid = cpu_to_be32(from->di_uid); + to->di_gid = cpu_to_be32(from->di_gid); + to->di_nlink = cpu_to_be32(from->di_nlink); + to->di_projid_lo = cpu_to_be16(from->di_projid_lo); + to->di_projid_hi = cpu_to_be16(from->di_projid_hi); + memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); + to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); + to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); + to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); + to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); + to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); + to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); + to->di_size = cpu_to_be64(from->di_size); + to->di_nblocks = cpu_to_be64(from->di_nblocks); + to->di_extsize = cpu_to_be32(from->di_extsize); + to->di_nextents = cpu_to_be32(from->di_nextents); + to->di_anextents = cpu_to_be16(from->di_anextents); + to->di_forkoff = from->di_forkoff; + to->di_aformat = from->di_aformat; + to->di_dmevmask = cpu_to_be32(from->di_dmevmask); + to->di_dmstate = cpu_to_be16(from->di_dmstate); + to->di_flags = cpu_to_be16(from->di_flags); + to->di_gen = cpu_to_be32(from->di_gen); + + if (from->di_version == 3) { + to->di_changecount = cpu_to_be64(from->di_changecount); + to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); + to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); + to->di_flags2 = cpu_to_be64(from->di_flags2); + to->di_ino = cpu_to_be64(from->di_ino); + to->di_lsn = cpu_to_be64(from->di_lsn); + memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); + uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; + } else { + to->di_flushiter = cpu_to_be16(from->di_flushiter); + } +} + +static bool +xfs_dinode_verify( + struct xfs_mount *mp, + struct xfs_inode *ip, + struct xfs_dinode *dip) +{ + if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) + return false; + + /* only version 3 or greater inodes are extensively verified here */ + if (dip->di_version < 3) + return true; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + return false; + if (be64_to_cpu(dip->di_ino) != ip->i_ino) + return false; + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) + return false; + return true; +} + +void +xfs_dinode_calc_crc( + struct xfs_mount *mp, + struct xfs_dinode *dip) +{ + __uint32_t crc; + + if (dip->di_version < 3) + return; + + ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); + crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF); + dip->di_crc = xfs_end_cksum(crc); +} + +/* + * Read the disk inode attributes into the in-core inode structure. + * + * For version 5 superblocks, if we are initialising a new inode and we are not + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new + * inode core with a random generation number. If we are keeping inodes around, + * we need to read the inode cluster to get the existing generation number off + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode + * format) then log recovery is dependent on the di_flushiter field being + * initialised from the current on-disk value and hence we must also read the + * inode off disk. + */ +int +xfs_iread( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *ip, + uint iget_flags) +{ + xfs_buf_t *bp; + xfs_dinode_t *dip; + int error; + + /* + * Fill in the location information in the in-core inode. + */ + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); + if (error) + return error; + + /* shortcut IO on inode allocation if possible */ + if ((iget_flags & XFS_IGET_CREATE) && + xfs_sb_version_hascrc(&mp->m_sb) && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + /* initialise the on-disk inode core */ + memset(&ip->i_d, 0, sizeof(ip->i_d)); + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_gen = prandom_u32(); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + ip->i_d.di_version = 3; + ip->i_d.di_ino = ip->i_ino; + uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); + } else + ip->i_d.di_version = 2; + return 0; + } + + /* + * Get pointers to the on-disk inode and the buffer containing it. + */ + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); + if (error) + return error; + + /* even unallocated inodes are verified */ + if (!xfs_dinode_verify(mp, ip, dip)) { + xfs_alert(mp, "%s: validation failed for inode %lld failed", + __func__, ip->i_ino); + + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); + error = EFSCORRUPTED; + goto out_brelse; + } + + /* + * If the on-disk inode is already linked to a directory + * entry, copy all of the inode into the in-core inode. + * xfs_iformat_fork() handles copying in the inode format + * specific information. + * Otherwise, just get the truly permanent information. + */ + if (dip->di_mode) { + xfs_dinode_from_disk(&ip->i_d, dip); + error = xfs_iformat_fork(ip, dip); + if (error) { +#ifdef DEBUG + xfs_alert(mp, "%s: xfs_iformat() returned error %d", + __func__, error); +#endif /* DEBUG */ + goto out_brelse; + } + } else { + /* + * Partial initialisation of the in-core inode. Just the bits + * that xfs_ialloc won't overwrite or relies on being correct. + */ + ip->i_d.di_magic = be16_to_cpu(dip->di_magic); + ip->i_d.di_version = dip->di_version; + ip->i_d.di_gen = be32_to_cpu(dip->di_gen); + ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); + + if (dip->di_version == 3) { + ip->i_d.di_ino = be64_to_cpu(dip->di_ino); + uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); + } + + /* + * Make sure to pull in the mode here as well in + * case the inode is released without being used. + * This ensures that xfs_inactive() will see that + * the inode is already free and not try to mess + * with the uninitialized part of it. + */ + ip->i_d.di_mode = 0; + } + + /* + * Automatically convert version 1 inode formats in memory to version 2 + * inode format. If the inode is modified, it will get logged and + * rewritten as a version 2 inode. We can do this because we set the + * superblock feature bit for v2 inodes unconditionally during mount + * and it means the reast of the code can assume the inode version is 2 + * or higher. + */ + if (ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; + memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); + ip->i_d.di_nlink = ip->i_d.di_onlink; + ip->i_d.di_onlink = 0; + xfs_set_projid(ip, 0); + } + + ip->i_delayed_blks = 0; + + /* + * Mark the buffer containing the inode as something to keep + * around for a while. This helps to keep recently accessed + * meta-data in-core longer. + */ + xfs_buf_set_ref(bp, XFS_INO_REF); + + /* + * Use xfs_trans_brelse() to release the buffer containing the on-disk + * inode, because it was acquired with xfs_trans_read_buf() in + * xfs_imap_to_bp() above. If tp is NULL, this is just a normal + * brelse(). If we're within a transaction, then xfs_trans_brelse() + * will only release the buffer if it is not dirty within the + * transaction. It will be OK to release the buffer in this case, + * because inodes on disk are never destroyed and we will be locking the + * new in-core inode before putting it in the cache where other + * processes can find it. Thus we don't have to worry about the inode + * being changed just because we released the buffer. + */ + out_brelse: + xfs_trans_brelse(tp, bp); + return error; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c new file mode 100644 index 000000000000..2a124e97f082 --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -0,0 +1,1906 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" + +kmem_zone_t *xfs_ifork_zone; + +STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); +STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); +STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); + +#ifdef DEBUG +/* + * Make sure that the extents in the given memory buffer + * are valid. + */ +void +xfs_validate_extents( + xfs_ifork_t *ifp, + int nrecs, + xfs_exntfmt_t fmt) +{ + xfs_bmbt_irec_t irec; + xfs_bmbt_rec_host_t rec; + int i; + + for (i = 0; i < nrecs; i++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); + rec.l0 = get_unaligned(&ep->l0); + rec.l1 = get_unaligned(&ep->l1); + xfs_bmbt_get_all(&rec, &irec); + if (fmt == XFS_EXTFMT_NOSTATE) + ASSERT(irec.br_state == XFS_EXT_NORM); + } +} +#else /* DEBUG */ +#define xfs_validate_extents(ifp, nrecs, fmt) +#endif /* DEBUG */ + + +/* + * Move inode type and inode format specific information from the + * on-disk inode to the in-core inode. For fifos, devs, and sockets + * this means set if_rdev to the proper value. For files, directories, + * and symlinks this means to bring in the in-line data or extent + * pointers. For a file in B-tree format, only the root is immediately + * brought in-core. The rest will be in-lined in if_extents when it + * is first referenced (see xfs_iread_extents()). + */ +int +xfs_iformat_fork( + xfs_inode_t *ip, + xfs_dinode_t *dip) +{ + xfs_attr_shortform_t *atp; + int size; + int error = 0; + xfs_fsize_t di_size; + + if (unlikely(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks))) { + xfs_warn(ip->i_mount, + "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", + (unsigned long long)ip->i_ino, + (int)(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents)), + (unsigned long long) + be64_to_cpu(dip->di_nblocks)); + XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { + xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", + (unsigned long long)ip->i_ino, + dip->di_forkoff); + XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && + !ip->i_mount->m_rtdev_targp)) { + xfs_warn(ip->i_mount, + "corrupt dinode %Lu, has realtime flag set.", + ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", + XFS_ERRLEVEL_LOW, ip->i_mount, dip); + return EFSCORRUPTED; + } + + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { + XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + ip->i_d.di_size = 0; + ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); + break; + + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (dip->di_format) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { + xfs_warn(ip->i_mount, + "corrupt inode %Lu (local format for regular file).", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(4)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + di_size = be64_to_cpu(dip->di_size); + if (unlikely(di_size < 0 || + di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %Ld for local inode).", + (unsigned long long) ip->i_ino, + (long long) di_size); + XFS_CORRUPTION_ERROR("xfs_iformat(5)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + size = (int)di_size; + error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + break; + default: + XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, + ip->i_mount); + return EFSCORRUPTED; + } + break; + + default: + XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); + return EFSCORRUPTED; + } + if (error) { + return error; + } + if (!XFS_DFORK_Q(dip)) + return 0; + + ASSERT(ip->i_afp == NULL); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); + + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); + size = be16_to_cpu(atp->hdr.totsize); + + if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad attr fork size %Ld).", + (unsigned long long) ip->i_ino, + (long long) size); + XFS_CORRUPTION_ERROR("xfs_iformat(8)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: + error = EFSCORRUPTED; + break; + } + if (error) { + kmem_zone_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + xfs_idestroy_fork(ip, XFS_DATA_FORK); + } + return error; +} + +/* + * The file is in-lined in the on-disk inode. + * If it fits into if_inline_data, then copy + * it there, otherwise allocate a buffer for it + * and copy the data there. Either way, set + * if_data to point at the data. + * If we allocate a buffer for the data, make + * sure that its size is a multiple of 4 and + * record the real size in i_real_bytes. + */ +STATIC int +xfs_iformat_local( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork, + int size) +{ + xfs_ifork_t *ifp; + int real_size; + + /* + * If the size is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or memcpy() below. + */ + if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %d for local fork, size = %d).", + (unsigned long long) ip->i_ino, size, + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); + XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + real_size = 0; + if (size == 0) + ifp->if_u1.if_data = NULL; + else if (size <= sizeof(ifp->if_u2.if_inline_data)) + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + else { + real_size = roundup(size, 4); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); + } + ifp->if_bytes = size; + ifp->if_real_bytes = real_size; + if (size) + memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFINLINE; + return 0; +} + +/* + * The file consists of a set of extents all + * of which fit into the on-disk inode. + * If there are few enough extents to fit into + * the if_inline_ext, then copy them there. + * Otherwise allocate a buffer for them and copy + * them into it. Either way, set if_extents + * to point at the extents. + */ +STATIC int +xfs_iformat_extents( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + xfs_bmbt_rec_t *dp; + xfs_ifork_t *ifp; + int nex; + int size; + int i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + nex = XFS_DFORK_NEXTENTS(dip, whichfork); + size = nex * (uint)sizeof(xfs_bmbt_rec_t); + + /* + * If the number of extents is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or memcpy() below. + */ + if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", + (unsigned long long) ip->i_ino, nex); + XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return EFSCORRUPTED; + } + + ifp->if_real_bytes = 0; + if (nex == 0) + ifp->if_u1.if_extents = NULL; + else if (nex <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + else + xfs_iext_add(ifp, 0, nex); + + ifp->if_bytes = size; + if (size) { + dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); + xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); + for (i = 0; i < nex; i++, dp++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); + ep->l0 = get_unaligned_be64(&dp->l0); + ep->l1 = get_unaligned_be64(&dp->l1); + } + XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); + if (whichfork != XFS_DATA_FORK || + XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) + if (unlikely(xfs_check_nostate_extents( + ifp, 0, nex))) { + XFS_ERROR_REPORT("xfs_iformat_extents(2)", + XFS_ERRLEVEL_LOW, + ip->i_mount); + return EFSCORRUPTED; + } + } + ifp->if_flags |= XFS_IFEXTENTS; + return 0; +} + +/* + * The file has too many extents to fit into + * the inode, so they are in B-tree format. + * Allocate a buffer for the root of the B-tree + * and copy the root into it. The i_extents + * field will remain NULL until all of the + * extents are read in (when they are needed). + */ +STATIC int +xfs_iformat_btree( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_bmdr_block_t *dfp; + xfs_ifork_t *ifp; + /* REFERENCED */ + int nrecs; + int size; + + ifp = XFS_IFORK_PTR(ip, whichfork); + dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); + size = XFS_BMAP_BROOT_SPACE(mp, dfp); + nrecs = be16_to_cpu(dfp->bb_numrecs); + + /* + * blow out if -- fork has less extents than can fit in + * fork (fork shouldn't be a btree format), root btree + * block has more records than can fit into the fork, + * or the number of extents is greater than the number of + * blocks. + */ + if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= + XFS_IFORK_MAXEXT(ip, whichfork) || + XFS_BMDR_SPACE_CALC(nrecs) > + XFS_DFORK_SIZE(dip, mp, whichfork) || + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + xfs_warn(mp, "corrupt inode %Lu (btree).", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, + mp, dip); + return EFSCORRUPTED; + } + + ifp->if_broot_bytes = size; + ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); + ASSERT(ifp->if_broot != NULL); + /* + * Copy and convert from the on-disk structure + * to the in-memory structure. + */ + xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), + ifp->if_broot, size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFBROOT; + + return 0; +} + +/* + * Read in extents from a btree-format inode. + * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. + */ +int +xfs_iread_extents( + xfs_trans_t *tp, + xfs_inode_t *ip, + int whichfork) +{ + int error; + xfs_ifork_t *ifp; + xfs_extnum_t nextents; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { + XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, + ip->i_mount); + return EFSCORRUPTED; + } + nextents = XFS_IFORK_NEXTENTS(ip, whichfork); + ifp = XFS_IFORK_PTR(ip, whichfork); + + /* + * We know that the size is valid (it's checked in iformat_btree) + */ + ifp->if_bytes = ifp->if_real_bytes = 0; + ifp->if_flags |= XFS_IFEXTENTS; + xfs_iext_add(ifp, 0, nextents); + error = xfs_bmap_read_extents(tp, ip, whichfork); + if (error) { + xfs_iext_destroy(ifp); + ifp->if_flags &= ~XFS_IFEXTENTS; + return error; + } + xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); + return 0; +} +/* + * Reallocate the space for if_broot based on the number of records + * being added or deleted as indicated in rec_diff. Move the records + * and pointers in if_broot to fit the new size. When shrinking this + * will eliminate holes between the records and pointers created by + * the caller. When growing this will create holes to be filled in + * by the caller. + * + * The caller must not request to add more records than would fit in + * the on-disk inode root. If the if_broot is currently NULL, then + * if we are adding records, one will be allocated. The caller must also + * not request that the number of records go below zero, although + * it can go to zero. + * + * ip -- the inode whose if_broot area is changing + * ext_diff -- the change in the number of records, positive or negative, + * requested for the if_broot array. + */ +void +xfs_iroot_realloc( + xfs_inode_t *ip, + int rec_diff, + int whichfork) +{ + struct xfs_mount *mp = ip->i_mount; + int cur_max; + xfs_ifork_t *ifp; + struct xfs_btree_block *new_broot; + int new_max; + size_t new_size; + char *np; + char *op; + + /* + * Handle the degenerate case quietly. + */ + if (rec_diff == 0) { + return; + } + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (rec_diff > 0) { + /* + * If there wasn't any memory allocated before, just + * allocate it now and get out. + */ + if (ifp->if_broot_bytes == 0) { + new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); + ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); + ifp->if_broot_bytes = (int)new_size; + return; + } + + /* + * If there is already an existing if_broot, then we need + * to realloc() it and shift the pointers to their new + * location. The records don't change location because + * they are kept butted up against the btree block header. + */ + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + new_max = cur_max + rec_diff; + new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, + XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max), + KM_SLEEP | KM_NOFS); + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + (int)new_size); + ifp->if_broot_bytes = (int)new_size; + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); + memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); + return; + } + + /* + * rec_diff is less than 0. In this case, we are shrinking the + * if_broot buffer. It must already exist. If we go to zero + * records, just get rid of the root and clear the status bit. + */ + ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + new_max = cur_max + rec_diff; + ASSERT(new_max >= 0); + if (new_max > 0) + new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + else + new_size = 0; + if (new_size > 0) { + new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); + /* + * First copy over the btree block header. + */ + memcpy(new_broot, ifp->if_broot, + XFS_BMBT_BLOCK_LEN(ip->i_mount)); + } else { + new_broot = NULL; + ifp->if_flags &= ~XFS_IFBROOT; + } + + /* + * Only copy the records and pointers if there are any. + */ + if (new_max > 0) { + /* + * First copy the records. + */ + op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); + np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); + memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); + + /* + * Then copy the pointers. + */ + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, + (int)new_size); + memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); + } + kmem_free(ifp->if_broot); + ifp->if_broot = new_broot; + ifp->if_broot_bytes = (int)new_size; + if (ifp->if_broot) + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); + return; +} + + +/* + * This is called when the amount of space needed for if_data + * is increased or decreased. The change in size is indicated by + * the number of bytes that need to be added or deleted in the + * byte_diff parameter. + * + * If the amount of space needed has decreased below the size of the + * inline buffer, then switch to using the inline buffer. Otherwise, + * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer + * to what is needed. + * + * ip -- the inode whose if_data area is changing + * byte_diff -- the change in the number of bytes, positive or negative, + * requested for the if_data array. + */ +void +xfs_idata_realloc( + xfs_inode_t *ip, + int byte_diff, + int whichfork) +{ + xfs_ifork_t *ifp; + int new_size; + int real_size; + + if (byte_diff == 0) { + return; + } + + ifp = XFS_IFORK_PTR(ip, whichfork); + new_size = (int)ifp->if_bytes + byte_diff; + ASSERT(new_size >= 0); + + if (new_size == 0) { + if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + kmem_free(ifp->if_u1.if_data); + } + ifp->if_u1.if_data = NULL; + real_size = 0; + } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { + /* + * If the valid extents/data can fit in if_inline_ext/data, + * copy them from the malloc'd vector and free it. + */ + if (ifp->if_u1.if_data == NULL) { + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + ASSERT(ifp->if_real_bytes != 0); + memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, + new_size); + kmem_free(ifp->if_u1.if_data); + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + } + real_size = 0; + } else { + /* + * Stuck with malloc/realloc. + * For inline data, the underlying buffer must be + * a multiple of 4 bytes in size so that it can be + * logged and stay on word boundaries. We enforce + * that here. + */ + real_size = roundup(new_size, 4); + if (ifp->if_u1.if_data == NULL) { + ASSERT(ifp->if_real_bytes == 0); + ifp->if_u1.if_data = kmem_alloc(real_size, + KM_SLEEP | KM_NOFS); + } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + /* + * Only do the realloc if the underlying size + * is really changing. + */ + if (ifp->if_real_bytes != real_size) { + ifp->if_u1.if_data = + kmem_realloc(ifp->if_u1.if_data, + real_size, + ifp->if_real_bytes, + KM_SLEEP | KM_NOFS); + } + } else { + ASSERT(ifp->if_real_bytes == 0); + ifp->if_u1.if_data = kmem_alloc(real_size, + KM_SLEEP | KM_NOFS); + memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, + ifp->if_bytes); + } + } + ifp->if_real_bytes = real_size; + ifp->if_bytes = new_size; + ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); +} + +void +xfs_idestroy_fork( + xfs_inode_t *ip, + int whichfork) +{ + xfs_ifork_t *ifp; + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (ifp->if_broot != NULL) { + kmem_free(ifp->if_broot); + ifp->if_broot = NULL; + } + + /* + * If the format is local, then we can't have an extents + * array so just look for an inline data array. If we're + * not local then we may or may not have an extents list, + * so check and free it up if we do. + */ + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && + (ifp->if_u1.if_data != NULL)) { + ASSERT(ifp->if_real_bytes != 0); + kmem_free(ifp->if_u1.if_data); + ifp->if_u1.if_data = NULL; + ifp->if_real_bytes = 0; + } + } else if ((ifp->if_flags & XFS_IFEXTENTS) && + ((ifp->if_flags & XFS_IFEXTIREC) || + ((ifp->if_u1.if_extents != NULL) && + (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { + ASSERT(ifp->if_real_bytes != 0); + xfs_iext_destroy(ifp); + } + ASSERT(ifp->if_u1.if_extents == NULL || + ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); + ASSERT(ifp->if_real_bytes == 0); + if (whichfork == XFS_ATTR_FORK) { + kmem_zone_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + } +} + +/* + * Convert in-core extents to on-disk form + * + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. + * + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only copy on-disk extents + * here, so callers must always use the physical fork size to determine the + * size of the buffer passed to this routine. We will return the size actually + * used. + */ +int +xfs_iextents_copy( + xfs_inode_t *ip, + xfs_bmbt_rec_t *dp, + int whichfork) +{ + int copied; + int i; + xfs_ifork_t *ifp; + int nrecs; + xfs_fsblock_t start_block; + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + ASSERT(ifp->if_bytes > 0); + + nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); + ASSERT(nrecs > 0); + + /* + * There are some delayed allocation extents in the + * inode, so copy the extents one at a time and skip + * the delayed ones. There must be at least one + * non-delayed extent. + */ + copied = 0; + for (i = 0; i < nrecs; i++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); + start_block = xfs_bmbt_get_startblock(ep); + if (isnullstartblock(start_block)) { + /* + * It's a delayed allocation extent, so skip it. + */ + continue; + } + + /* Translate to on disk format */ + put_unaligned_be64(ep->l0, &dp->l0); + put_unaligned_be64(ep->l1, &dp->l1); + dp++; + copied++; + } + ASSERT(copied != 0); + xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); + + return (copied * (uint)sizeof(xfs_bmbt_rec_t)); +} + +/* + * Each of the following cases stores data into the same region + * of the on-disk inode, so only one of them can be valid at + * any given time. While it is possible to have conflicting formats + * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is + * in EXTENTS format, this can only happen when the fork has + * changed formats after being modified but before being flushed. + * In these cases, the format always takes precedence, because the + * format indicates the current state of the fork. + */ +void +xfs_iflush_fork( + xfs_inode_t *ip, + xfs_dinode_t *dip, + xfs_inode_log_item_t *iip, + int whichfork) +{ + char *cp; + xfs_ifork_t *ifp; + xfs_mount_t *mp; + static const short brootflag[2] = + { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; + static const short dataflag[2] = + { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; + static const short extflag[2] = + { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; + + if (!iip) + return; + ifp = XFS_IFORK_PTR(ip, whichfork); + /* + * This can happen if we gave up in iformat in an error path, + * for the attribute fork. + */ + if (!ifp) { + ASSERT(whichfork == XFS_ATTR_FORK); + return; + } + cp = XFS_DFORK_PTR(dip, whichfork); + mp = ip->i_mount; + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: + if ((iip->ili_fields & dataflag[whichfork]) && + (ifp->if_bytes > 0)) { + ASSERT(ifp->if_u1.if_data != NULL); + ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); + memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); + } + break; + + case XFS_DINODE_FMT_EXTENTS: + ASSERT((ifp->if_flags & XFS_IFEXTENTS) || + !(iip->ili_fields & extflag[whichfork])); + if ((iip->ili_fields & extflag[whichfork]) && + (ifp->if_bytes > 0)) { + ASSERT(xfs_iext_get_ext(ifp, 0)); + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); + (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, + whichfork); + } + break; + + case XFS_DINODE_FMT_BTREE: + if ((iip->ili_fields & brootflag[whichfork]) && + (ifp->if_broot_bytes > 0)) { + ASSERT(ifp->if_broot != NULL); + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); + xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, + (xfs_bmdr_block_t *)cp, + XFS_DFORK_SIZE(dip, mp, whichfork)); + } + break; + + case XFS_DINODE_FMT_DEV: + if (iip->ili_fields & XFS_ILOG_DEV) { + ASSERT(whichfork == XFS_DATA_FORK); + xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); + } + break; + + case XFS_DINODE_FMT_UUID: + if (iip->ili_fields & XFS_ILOG_UUID) { + ASSERT(whichfork == XFS_DATA_FORK); + memcpy(XFS_DFORK_DPTR(dip), + &ip->i_df.if_u2.if_uuid, + sizeof(uuid_t)); + } + break; + + default: + ASSERT(0); + break; + } +} + +/* + * Return a pointer to the extent record at file index idx. + */ +xfs_bmbt_rec_host_t * +xfs_iext_get_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx) /* index of target extent */ +{ + ASSERT(idx >= 0); + ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { + return ifp->if_u1.if_ext_irec->er_extbuf; + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_ext_irec_t *erp; /* irec pointer */ + int erp_idx = 0; /* irec index */ + xfs_extnum_t page_idx = idx; /* ext index in target list */ + + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + return &erp->er_extbuf[page_idx]; + } else if (ifp->if_bytes) { + return &ifp->if_u1.if_extents[idx]; + } else { + return NULL; + } +} + +/* + * Insert new item(s) into the extent records for incore inode + * fork 'ifp'. 'count' new items are inserted at index 'idx'. + */ +void +xfs_iext_insert( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* starting index of new items */ + xfs_extnum_t count, /* number of inserted items */ + xfs_bmbt_irec_t *new, /* items to insert */ + int state) /* type of extent conversion */ +{ + xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; + xfs_extnum_t i; /* extent record index */ + + trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); + + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_iext_add(ifp, idx, count); + for (i = idx; i < idx + count; i++, new++) + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be increased. The ext_diff parameter stores the + * number of new extents being added and the idx parameter contains + * the extent index where the new extents will be added. If the new + * extents are being appended, then we just need to (re)allocate and + * initialize the space. Otherwise, if the new extents are being + * inserted into the middle of the existing entries, a bit more work + * is required to make room for the new extents to be inserted. The + * caller is responsible for filling in the new extent entries upon + * return. + */ +void +xfs_iext_add( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin adding exts */ + int ext_diff) /* number of extents to add */ +{ + int byte_diff; /* new bytes being added */ + int new_size; /* size of extents after adding */ + xfs_extnum_t nextents; /* number of extents in file */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT((idx >= 0) && (idx <= nextents)); + byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); + new_size = ifp->if_bytes + byte_diff; + /* + * If the new number of extents (nextents + ext_diff) + * fits inside the inode, then continue to use the inline + * extent buffer. + */ + if (nextents + ext_diff <= XFS_INLINE_EXTS) { + if (idx < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], + &ifp->if_u2.if_inline_ext[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); + } + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; + } + /* + * Otherwise use a linear (direct) extent list. + * If the extents are currently inside the inode, + * xfs_iext_realloc_direct will switch us from + * inline to direct extent allocation mode. + */ + else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { + xfs_iext_realloc_direct(ifp, new_size); + if (idx < nextents) { + memmove(&ifp->if_u1.if_extents[idx + ext_diff], + &ifp->if_u1.if_extents[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); + } + } + /* Indirection array */ + else { + xfs_ext_irec_t *erp; + int erp_idx = 0; + int page_idx = idx; + + ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); + if (ifp->if_flags & XFS_IFEXTIREC) { + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); + } else { + xfs_iext_irec_init(ifp); + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = ifp->if_u1.if_ext_irec; + } + /* Extents fit in target extent page */ + if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { + if (page_idx < erp->er_extcount) { + memmove(&erp->er_extbuf[page_idx + ext_diff], + &erp->er_extbuf[page_idx], + (erp->er_extcount - page_idx) * + sizeof(xfs_bmbt_rec_t)); + memset(&erp->er_extbuf[page_idx], 0, byte_diff); + } + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + } + /* Insert a new extent page */ + else if (erp) { + xfs_iext_add_indirect_multi(ifp, + erp_idx, page_idx, ext_diff); + } + /* + * If extent(s) are being appended to the last page in + * the indirection array and the new extent(s) don't fit + * in the page, then erp is NULL and erp_idx is set to + * the next index needed in the indirection array. + */ + else { + uint count = ext_diff; + + while (count) { + erp = xfs_iext_irec_new(ifp, erp_idx); + erp->er_extcount = min(count, XFS_LINEAR_EXTS); + count -= erp->er_extcount; + if (count) + erp_idx++; + } + } + } + ifp->if_bytes = new_size; +} + +/* + * This is called when incore extents are being added to the indirection + * array and the new extents do not fit in the target extent list. The + * erp_idx parameter contains the irec index for the target extent list + * in the indirection array, and the idx parameter contains the extent + * index within the list. The number of extents being added is stored + * in the count parameter. + * + * |-------| |-------| + * | | | | idx - number of extents before idx + * | idx | | count | + * | | | | count - number of extents being inserted at idx + * |-------| |-------| + * | count | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_add_indirect_multi( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* target extent irec index */ + xfs_extnum_t idx, /* index within target list */ + int count) /* new extents being added */ +{ + int byte_diff; /* new bytes being added */ + xfs_ext_irec_t *erp; /* pointer to irec entry */ + xfs_extnum_t ext_diff; /* number of extents to add */ + xfs_extnum_t ext_cnt; /* new extents still needed */ + xfs_extnum_t nex2; /* extents after idx + count */ + xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ + int nlists; /* number of irec's (lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex2 = erp->er_extcount - idx; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* + * Save second part of target extent list + * (all extents past */ + if (nex2) { + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); + memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); + erp->er_extcount -= nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); + memset(&erp->er_extbuf[idx], 0, byte_diff); + } + + /* + * Add the new extents to the end of the target + * list, then allocate new irec record(s) and + * extent buffer(s) as needed to store the rest + * of the new extents. + */ + ext_cnt = count; + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); + if (ext_diff) { + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + while (ext_cnt) { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); + erp->er_extcount = ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + + /* Add nex2 extents back to indirection array */ + if (nex2) { + xfs_extnum_t ext_avail; + int i; + + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; + i = 0; + /* + * If nex2 extents fit in the current page, append + * nex2_ep after the new extents. + */ + if (nex2 <= ext_avail) { + i = erp->er_extcount; + } + /* + * Otherwise, check if space is available in the + * next page. + */ + else if ((erp_idx < nlists - 1) && + (nex2 <= (ext_avail = XFS_LINEAR_EXTS - + ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { + erp_idx++; + erp++; + /* Create a hole for nex2 extents */ + memmove(&erp->er_extbuf[nex2], erp->er_extbuf, + erp->er_extcount * sizeof(xfs_bmbt_rec_t)); + } + /* + * Final choice, create a new extent page for + * nex2 extents. + */ + else { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + } + memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); + kmem_free(nex2_ep); + erp->er_extcount += nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); + } +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be decreased. The ext_diff parameter stores the + * number of extents to be removed and the idx parameter contains + * the extent index where the extents will be removed from. + * + * If the amount of space needed has decreased below the linear + * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous + * extent array. Otherwise, use kmem_realloc() to adjust the + * size to what is needed. + */ +void +xfs_iext_remove( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff, /* number of extents to remove */ + int state) /* type of extent conversion */ +{ + xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + trace_xfs_iext_remove(ip, idx, state, _RET_IP_); + + ASSERT(ext_diff > 0); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_iext_remove_indirect(ifp, idx, ext_diff); + } else if (ifp->if_real_bytes) { + xfs_iext_remove_direct(ifp, idx, ext_diff); + } else { + xfs_iext_remove_inline(ifp, idx, ext_diff); + } + ifp->if_bytes = new_size; +} + +/* + * This removes ext_diff extents from the inline buffer, beginning + * at extent index idx. + */ +void +xfs_iext_remove_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + int nextents; /* number of extents in file */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + ASSERT(idx < XFS_INLINE_EXTS); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(((nextents - ext_diff) > 0) && + (nextents - ext_diff) < XFS_INLINE_EXTS); + + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx], + &ifp->if_u2.if_inline_ext[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + } else { + memset(&ifp->if_u2.if_inline_ext[idx], 0, + ext_diff * sizeof(xfs_bmbt_rec_t)); + } +} + +/* + * This removes ext_diff extents from a linear (direct) extent list, + * beginning at extent index idx. If the extents are being removed + * from the end of the list (ie. truncate) then we just need to re- + * allocate the list to remove the extra space. Otherwise, if the + * extents are being removed from the middle of the existing extent + * entries, then we first need to move the extent records beginning + * at idx + ext_diff up in the list to overwrite the records being + * removed, then remove the extra space via kmem_realloc. + */ +void +xfs_iext_remove_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + new_size = ifp->if_bytes - + (ext_diff * sizeof(xfs_bmbt_rec_t)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + return; + } + /* Move extents up in the list (if needed) */ + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u1.if_extents[idx], + &ifp->if_u1.if_extents[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + } + memset(&ifp->if_u1.if_extents[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + /* + * Reallocate the direct extent list. If the extents + * will fit inside the inode then xfs_iext_realloc_direct + * will switch from direct to inline extent allocation + * mode for us. + */ + xfs_iext_realloc_direct(ifp, new_size); + ifp->if_bytes = new_size; +} + +/* + * This is called when incore extents are being removed from the + * indirection array and the extents being removed span multiple extent + * buffers. The idx parameter contains the file extent index where we + * want to begin removing extents, and the count parameter contains + * how many extents need to be removed. + * + * |-------| |-------| + * | nex1 | | | nex1 - number of extents before idx + * |-------| | count | + * | | | | count - number of extents being removed at idx + * | count | |-------| + * | | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_remove_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing extents */ + int count) /* number of extents to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int erp_idx = 0; /* indirection array index */ + xfs_extnum_t ext_cnt; /* extents left to remove */ + xfs_extnum_t ext_diff; /* extents to remove in current list */ + xfs_extnum_t nex1; /* number of extents before idx */ + xfs_extnum_t nex2; /* extents after idx + count */ + int page_idx = idx; /* index in target extent list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + ASSERT(erp != NULL); + nex1 = page_idx; + ext_cnt = count; + while (ext_cnt) { + nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); + ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); + /* + * Check for deletion of entire list; + * xfs_iext_irec_remove() updates extent offsets. + */ + if (ext_diff == erp->er_extcount) { + xfs_iext_irec_remove(ifp, erp_idx); + ext_cnt -= ext_diff; + nex1 = 0; + if (ext_cnt) { + ASSERT(erp_idx < ifp->if_real_bytes / + XFS_IEXT_BUFSZ); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex1 = 0; + continue; + } else { + break; + } + } + /* Move extents up (if needed) */ + if (nex2) { + memmove(&erp->er_extbuf[nex1], + &erp->er_extbuf[nex1 + ext_diff], + nex2 * sizeof(xfs_bmbt_rec_t)); + } + /* Zero out rest of page */ + memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - + ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); + /* Update remaining counters */ + erp->er_extcount -= ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); + ext_cnt -= ext_diff; + nex1 = 0; + erp_idx++; + erp++; + } + ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); + xfs_iext_irec_compact(ifp); +} + +/* + * Create, destroy, or resize a linear (direct) block of extents. + */ +void +xfs_iext_realloc_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new size of extents after adding */ +{ + int rnew_size; /* real new size of extents */ + + rnew_size = new_size; + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || + ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && + (new_size != ifp->if_real_bytes))); + + /* Free extent records */ + if (new_size == 0) { + xfs_iext_destroy(ifp); + } + /* Resize direct extent list and zero any new bytes */ + else if (ifp->if_real_bytes) { + /* Check if extents will fit inside the inode */ + if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { + xfs_iext_direct_to_inline(ifp, new_size / + (uint)sizeof(xfs_bmbt_rec_t)); + ifp->if_bytes = new_size; + return; + } + if (!is_power_of_2(new_size)){ + rnew_size = roundup_pow_of_two(new_size); + } + if (rnew_size != ifp->if_real_bytes) { + ifp->if_u1.if_extents = + kmem_realloc(ifp->if_u1.if_extents, + rnew_size, + ifp->if_real_bytes, KM_NOFS); + } + if (rnew_size > ifp->if_real_bytes) { + memset(&ifp->if_u1.if_extents[ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)], 0, + rnew_size - ifp->if_real_bytes); + } + } + /* Switch from the inline extent buffer to a direct extent list */ + else { + if (!is_power_of_2(new_size)) { + rnew_size = roundup_pow_of_two(new_size); + } + xfs_iext_inline_to_direct(ifp, rnew_size); + } + ifp->if_real_bytes = rnew_size; + ifp->if_bytes = new_size; +} + +/* + * Switch from linear (direct) extent records to inline buffer. + */ +void +xfs_iext_direct_to_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t nextents) /* number of extents in file */ +{ + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(nextents <= XFS_INLINE_EXTS); + /* + * The inline buffer was zeroed when we switched + * from inline to direct extent allocation mode, + * so we don't need to clear it here. + */ + memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, + nextents * sizeof(xfs_bmbt_rec_t)); + kmem_free(ifp->if_u1.if_extents); + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; +} + +/* + * Switch from inline buffer to linear (direct) extent records. + * new_size should already be rounded up to the next power of 2 + * by the caller (when appropriate), so use new_size as it is. + * However, since new_size may be rounded up, we can't update + * if_bytes here. It is the caller's responsibility to update + * if_bytes upon return. + */ +void +xfs_iext_inline_to_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* number of extents in file */ +{ + ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); + memset(ifp->if_u1.if_extents, 0, new_size); + if (ifp->if_bytes) { + memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, + ifp->if_bytes); + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_real_bytes = new_size; +} + +/* + * Resize an extent indirection array to new_size bytes. + */ +STATIC void +xfs_iext_realloc_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new indirection array size */ +{ + int nlists; /* number of irec's (ex lists) */ + int size; /* current indirection array size */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + size = nlists * sizeof(xfs_ext_irec_t); + ASSERT(ifp->if_real_bytes); + ASSERT((new_size >= 0) && (new_size != size)); + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else { + ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) + kmem_realloc(ifp->if_u1.if_ext_irec, + new_size, size, KM_NOFS); + } +} + +/* + * Switch from indirection array to linear (direct) extent allocations. + */ +STATIC void +xfs_iext_indirect_to_direct( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + int size; /* size of file extents */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + size = nextents * sizeof(xfs_bmbt_rec_t); + + xfs_iext_irec_compact_pages(ifp); + ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); + + ep = ifp->if_u1.if_ext_irec->er_extbuf; + kmem_free(ifp->if_u1.if_ext_irec); + ifp->if_flags &= ~XFS_IFEXTIREC; + ifp->if_u1.if_extents = ep; + ifp->if_bytes = size; + if (nextents < XFS_LINEAR_EXTS) { + xfs_iext_realloc_direct(ifp, size); + } +} + +/* + * Free incore file extents. + */ +void +xfs_iext_destroy( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + if (ifp->if_flags & XFS_IFEXTIREC) { + int erp_idx; + int nlists; + + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { + xfs_iext_irec_remove(ifp, erp_idx); + } + ifp->if_flags &= ~XFS_IFEXTIREC; + } else if (ifp->if_real_bytes) { + kmem_free(ifp->if_u1.if_extents); + } else if (ifp->if_bytes) { + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_u1.if_extents = NULL; + ifp->if_real_bytes = 0; + ifp->if_bytes = 0; +} + +/* + * Return a pointer to the extent record for file system block bno. + */ +xfs_bmbt_rec_host_t * /* pointer to found extent record */ +xfs_iext_bno_to_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + xfs_extnum_t *idxp) /* index of target extent */ +{ + xfs_bmbt_rec_host_t *base; /* pointer to first extent */ + xfs_filblks_t blockcount = 0; /* number of blocks in extent */ + xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + int high; /* upper boundary in search */ + xfs_extnum_t idx = 0; /* index of target extent */ + int low; /* lower boundary in search */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_fileoff_t startoff = 0; /* start offset of extent */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *idxp = 0; + return NULL; + } + low = 0; + if (ifp->if_flags & XFS_IFEXTIREC) { + /* Find target extent list */ + int erp_idx = 0; + erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); + base = erp->er_extbuf; + high = erp->er_extcount - 1; + } else { + base = ifp->if_u1.if_extents; + high = nextents - 1; + } + /* Binary search extent records */ + while (low <= high) { + idx = (low + high) >> 1; + ep = base + idx; + startoff = xfs_bmbt_get_startoff(ep); + blockcount = xfs_bmbt_get_blockcount(ep); + if (bno < startoff) { + high = idx - 1; + } else if (bno >= startoff + blockcount) { + low = idx + 1; + } else { + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + *idxp = idx; + return ep; + } + } + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + if (bno >= startoff + blockcount) { + if (++idx == nextents) { + ep = NULL; + } else { + ep = xfs_iext_get_ext(ifp, idx); + } + } + *idxp = idx; + return ep; +} + +/* + * Return a pointer to the indirection array entry containing the + * extent record for filesystem block bno. Store the index of the + * target irec in *erp_idxp. + */ +xfs_ext_irec_t * /* pointer to found extent record */ +xfs_iext_bno_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + int *erp_idxp) /* irec index of target ext list */ +{ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + xfs_ext_irec_t *erp_next; /* next indirection array entry */ + int erp_idx; /* indirection array index */ + int nlists; /* number of extent irec's (lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; + if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { + high = erp_idx - 1; + } else if (erp_next && bno >= + xfs_bmbt_get_startoff(erp_next->er_extbuf)) { + low = erp_idx + 1; + } else { + break; + } + } + *erp_idxp = erp_idx; + return erp; +} + +/* + * Return a pointer to the indirection array entry containing the + * extent record at file extent index *idxp. Store the index of the + * target irec in *erp_idxp and store the page index of the target + * extent record in *idxp. + */ +xfs_ext_irec_t * +xfs_iext_idx_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t *idxp, /* extent index (file -> page) */ + int *erp_idxp, /* pointer to target irec */ + int realloc) /* new bytes were just added */ +{ + xfs_ext_irec_t *prev; /* pointer to previous irec */ + xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ + int erp_idx; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + xfs_extnum_t page_idx = *idxp; /* extent index in target list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + ASSERT(page_idx >= 0); + ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + + /* Binary search extent irec's */ + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + prev = erp_idx > 0 ? erp - 1 : NULL; + if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && + realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { + high = erp_idx - 1; + } else if (page_idx > erp->er_extoff + erp->er_extcount || + (page_idx == erp->er_extoff + erp->er_extcount && + !realloc)) { + low = erp_idx + 1; + } else if (page_idx == erp->er_extoff + erp->er_extcount && + erp->er_extcount == XFS_LINEAR_EXTS) { + ASSERT(realloc); + page_idx = 0; + erp_idx++; + erp = erp_idx < nlists ? erp + 1 : NULL; + break; + } else { + page_idx -= erp->er_extoff; + break; + } + } + *idxp = page_idx; + *erp_idxp = erp_idx; + return erp; +} + +/* + * Allocate and initialize an indirection array once the space needed + * for incore extents increases above XFS_IEXT_BUFSZ. + */ +void +xfs_iext_irec_init( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + + erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); + + if (nextents == 0) { + ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); + } else if (!ifp->if_real_bytes) { + xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); + } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { + xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); + } + erp->er_extbuf = ifp->if_u1.if_extents; + erp->er_extcount = nextents; + erp->er_extoff = 0; + + ifp->if_flags |= XFS_IFEXTIREC; + ifp->if_real_bytes = XFS_IEXT_BUFSZ; + ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); + ifp->if_u1.if_ext_irec = erp; + + return; +} + +/* + * Allocate and initialize a new entry in the indirection array. + */ +xfs_ext_irec_t * +xfs_iext_irec_new( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* index for new irec */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* Resize indirection array */ + xfs_iext_realloc_indirect(ifp, ++nlists * + sizeof(xfs_ext_irec_t)); + /* + * Move records down in the array so the + * new page can use erp_idx. + */ + erp = ifp->if_u1.if_ext_irec; + for (i = nlists - 1; i > erp_idx; i--) { + memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); + } + ASSERT(i == erp_idx); + + /* Initialize new extent record */ + erp = ifp->if_u1.if_ext_irec; + erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; + memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); + erp[erp_idx].er_extcount = 0; + erp[erp_idx].er_extoff = erp_idx > 0 ? + erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; + return (&erp[erp_idx]); +} + +/* + * Remove a record from the indirection array. + */ +void +xfs_iext_irec_remove( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* irec index to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + if (erp->er_extbuf) { + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, + -erp->er_extcount); + kmem_free(erp->er_extbuf); + } + /* Compact extent records */ + erp = ifp->if_u1.if_ext_irec; + for (i = erp_idx; i < nlists - 1; i++) { + memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); + } + /* + * Manually free the last extent record from the indirection + * array. A call to xfs_iext_realloc_indirect() with a size + * of zero would result in a call to xfs_iext_destroy() which + * would in turn call this function again, creating a nasty + * infinite loop. + */ + if (--nlists) { + xfs_iext_realloc_indirect(ifp, + nlists * sizeof(xfs_ext_irec_t)); + } else { + kmem_free(ifp->if_u1.if_ext_irec); + } + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; +} + +/* + * This is called to clean up large amounts of unused memory allocated + * by the indirection array. Before compacting anything though, verify + * that the indirection array is still needed and switch back to the + * linear extent list (or even the inline buffer) if possible. The + * compaction policy is as follows: + * + * Full Compaction: Extents fit into a single page (or inline buffer) + * Partial Compaction: Extents occupy less than 50% of allocated space + * No Compaction: Extents occupy at least 50% of allocated space + */ +void +xfs_iext_irec_compact( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (nextents == 0) { + xfs_iext_destroy(ifp); + } else if (nextents <= XFS_INLINE_EXTS) { + xfs_iext_indirect_to_direct(ifp); + xfs_iext_direct_to_inline(ifp, nextents); + } else if (nextents <= XFS_LINEAR_EXTS) { + xfs_iext_indirect_to_direct(ifp); + } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { + xfs_iext_irec_compact_pages(ifp); + } +} + +/* + * Combine extents from neighboring extent pages. + */ +void +xfs_iext_irec_compact_pages( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ + int erp_idx = 0; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + while (erp_idx < nlists - 1) { + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp + 1; + if (erp_next->er_extcount <= + (XFS_LINEAR_EXTS - erp->er_extcount)) { + memcpy(&erp->er_extbuf[erp->er_extcount], + erp_next->er_extbuf, erp_next->er_extcount * + sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += erp_next->er_extcount; + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + } else { + erp_idx++; + } + } +} + +/* + * This is called to update the er_extoff field in the indirection + * array when extents have been added or removed from one of the + * extent lists. erp_idx contains the irec index to begin updating + * at and ext_diff contains the number of extents that were added + * or removed. + */ +void +xfs_iext_irec_update_extoffs( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* irec index to update */ + int ext_diff) /* number of new extents */ +{ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (i = erp_idx; i < nlists; i++) { + ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; + } +} diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c new file mode 100644 index 000000000000..ee7e0e80246b --- /dev/null +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2013 Jie Liu. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_ag.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_trans_space.h" +#include "xfs_inode.h" +#include "xfs_da_btree.h" +#include "xfs_attr_leaf.h" +#include "xfs_bmap_btree.h" + +/* + * Calculate the maximum length in bytes that would be required for a local + * attribute value as large attributes out of line are not logged. + */ +STATIC int +xfs_log_calc_max_attrsetm_res( + struct xfs_mount *mp) +{ + int size; + int nblks; + + size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - + MAXNAMELEN - 1; + nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); + nblks += XFS_B_TO_FSB(mp, size); + nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); + + return M_RES(mp)->tr_attrsetm.tr_logres + + M_RES(mp)->tr_attrsetrt.tr_logres * nblks; +} + +/* + * Iterate over the log space reservation table to figure out and return + * the maximum one in terms of the pre-calculated values which were done + * at mount time. + */ +STATIC void +xfs_log_get_max_trans_res( + struct xfs_mount *mp, + struct xfs_trans_res *max_resp) +{ + struct xfs_trans_res *resp; + struct xfs_trans_res *end_resp; + int log_space = 0; + int attr_space; + + attr_space = xfs_log_calc_max_attrsetm_res(mp); + + resp = (struct xfs_trans_res *)M_RES(mp); + end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1); + for (; resp < end_resp; resp++) { + int tmp = resp->tr_logcount > 1 ? + resp->tr_logres * resp->tr_logcount : + resp->tr_logres; + if (log_space < tmp) { + log_space = tmp; + *max_resp = *resp; /* struct copy */ + } + } + + if (attr_space > log_space) { + *max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */ + max_resp->tr_logres = attr_space; + } +} + +/* + * Calculate the minimum valid log size for the given superblock configuration. + * Used to calculate the minimum log size at mkfs time, and to determine if + * the log is large enough or not at mount time. Returns the minimum size in + * filesystem block size units. + */ +int +xfs_log_calc_minimum_size( + struct xfs_mount *mp) +{ + struct xfs_trans_res tres = {0}; + int max_logres; + int min_logblks = 0; + int lsunit = 0; + + xfs_log_get_max_trans_res(mp, &tres); + + max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres); + if (tres.tr_logcount > 1) + max_logres *= tres.tr_logcount; + + if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) + lsunit = BTOBB(mp->m_sb.sb_logsunit); + + /* + * Two factors should be taken into account for calculating the minimum + * log space. + * 1) The fundamental limitation is that no single transaction can be + * larger than half size of the log. + * + * From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR + * define, which is set to 3. That means we can definitely fit + * maximally sized 2 transactions in the log. We'll use this same + * value here. + * + * 2) If the lsunit option is specified, a transaction requires 2 LSU + * for the reservation because there are two log writes that can + * require padding - the transaction data and the commit record which + * are written separately and both can require padding to the LSU. + * Consider that we can have an active CIL reservation holding 2*LSU, + * but the CIL is not over a push threshold, in this case, if we + * don't have enough log space for at one new transaction, which + * includes another 2*LSU in the reservation, we will run into dead + * loop situation in log space grant procedure. i.e. + * xlog_grant_head_wait(). + * + * Hence the log size needs to be able to contain two maximally sized + * and padded transactions, which is (2 * (2 * LSU + maxlres)). + * + * Also, the log size should be a multiple of the log stripe unit, round + * it up to lsunit boundary if lsunit is specified. + */ + if (lsunit) { + min_logblks = roundup_64(BTOBB(max_logres), lsunit) + + 2 * lsunit; + } else + min_logblks = BTOBB(max_logres) + 2 * BBSIZE; + min_logblks *= XFS_MIN_LOG_FACTOR; + + return XFS_BB_TO_FSB(mp, min_logblks); +} diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c new file mode 100644 index 000000000000..f4dd697cac08 --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -0,0 +1,973 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_buf.h" +#include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_rtalloc.h" + + +/* + * Realtime allocator bitmap functions shared with userspace. + */ + +/* + * Get a buffer for the bitmap or summary file block specified. + * The buffer is returned read and locked. + */ +int +xfs_rtbuf_get( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t block, /* block number in bitmap or summary */ + int issum, /* is summary not bitmap */ + xfs_buf_t **bpp) /* output: buffer for the block */ +{ + xfs_buf_t *bp; /* block buffer, result */ + xfs_inode_t *ip; /* bitmap or summary inode */ + xfs_bmbt_irec_t map; + int nmap = 1; + int error; /* error value */ + + ip = issum ? mp->m_rsumip : mp->m_rbmip; + + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); + if (error) + return error; + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp, NULL); + if (error) + return error; + *bpp = bp; + return 0; +} + +/* + * Searching backward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_back( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t firstbit; /* first useful bit in the word */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = start - limit + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit < XFS_NBWORD - 1) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); + mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << + firstbit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = bit - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i = bit - firstbit + 1; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the previous one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (len - i) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_NBWORD - (len - i); + mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start - i + 1; + return 0; +} + +/* + * Searching forward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_forw( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in the word */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = limit - start + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit) { + /* + * Calculate last (rightmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *rtblock = start + i - 1; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the previous word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Calculate mask for all the relevant bits in this word. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start + i - 1; + return 0; +} + +/* + * Read and modify the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +int +xfs_rtmodify_summary( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_buf_t *bp; /* buffer for the summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information, modify and log it. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sp += delta; + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), + (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); + return 0; +} + +/* + * Set the given range of bitmap bits to the given value. + * Do whatever I/O and logging is required. + */ +int +xfs_rtmodify_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to modify */ + xfs_extlen_t len, /* length of extent to modify */ + int val) /* 1 for free, 0 for allocated */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtword_t *first; /* first used word in the buffer */ + int i; /* current bit number rel. to start */ + int lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask o frelevant bits for value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number. + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block, and point to its data. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + first = b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zeroes; 1 (free) => all ones. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not changed and mask of relevant bits. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + i = lastbit - bit; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Set the word value correctly. + */ + *b = val; + i += XFS_NBWORD; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Compute a mask of relevant bits. + */ + bit = 0; + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + b++; + } + /* + * Log any remaining changed bytes. + */ + if (b > first) + xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp - 1)); + return 0; +} + +/* + * Mark an extent specified by start and len freed. + * Updates all the summary information as well as the bitmap. + */ +int +xfs_rtfree_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to free */ + xfs_extlen_t len, /* length to free */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_rtblock_t end; /* end of the freed extent */ + int error; /* error value */ + xfs_rtblock_t postblock; /* first block freed > end */ + xfs_rtblock_t preblock; /* first block freed < start */ + + end = start + len - 1; + /* + * Modify the bitmap to mark this extent freed. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 1); + if (error) { + return error; + } + /* + * Assume we're freeing out of the middle of an allocated extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. + */ + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of allocated extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + if (error) + return error; + /* + * If there are blocks not being freed at the front of the + * old extent, add summary data for them to be allocated. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being freed at the end of the + * old extent, add summary data for them to be allocated. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Increment the summary information corresponding to the entire + * (new) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + return error; +} + +/* + * Check that the given range is either all allocated (val = 0) or + * all free (val = 1). + */ +int +xfs_rtcheck_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block number of extent */ + xfs_extlen_t len, /* length of extent */ + int val, /* 1 for free, 0 for allocated */ + xfs_rtblock_t *new, /* out: first block not matching */ + int *stat) /* out: 1 for matches, 0 for not */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zero's; 1 (free) => all one's. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not examined. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + /* + * Mask of relevant bits. + */ + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ val) & mask)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *new = start + i; + *stat = 0; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ val)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Mask of relevant bits. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ val) & mask)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } else + i = len; + } + /* + * Successful, return. + */ + xfs_trans_brelse(tp, bp); + *new = start + i; + *stat = 1; + return 0; +} + +#ifdef DEBUG +/* + * Check that the given extent (block range) is allocated already. + */ +STATIC int /* error */ +xfs_rtcheck_alloc_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number of extent */ + xfs_extlen_t len) /* length of extent */ +{ + xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ + int stat; + int error; + + error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); + if (error) + return error; + ASSERT(stat); + return 0; +} +#else +#define xfs_rtcheck_alloc_range(m,t,b,l) (0) +#endif +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len) /* length of extent freed */ +{ + int error; /* error value */ + xfs_mount_t *mp; /* file system mount structure */ + xfs_fsblock_t sb; /* summary file block number */ + xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + + mp = tp->t_mountp; + + ASSERT(mp->m_rbmip->i_itemp != NULL); + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + + error = xfs_rtcheck_alloc_range(mp, tp, bno, len); + if (error) + return error; + + /* + * Free the range of realtime blocks. + */ + error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); + if (error) { + return error; + } + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); + /* + * If we've now freed all the blocks, reset the file sequence + * number to 0. + */ + if (tp->t_frextents_delta + mp->m_sb.sb_frextents == + mp->m_sb.sb_rextents) { + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); + } + return 0; +} + diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c new file mode 100644 index 000000000000..23c2f2577c8d --- /dev/null +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2012-2013 Red Hat, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_ag.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_symlink.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" + + +/* + * Each contiguous block has a header, so it is not just a simple pathlen + * to FSB conversion. + */ +int +xfs_symlink_blocks( + struct xfs_mount *mp, + int pathlen) +{ + int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize); + + return (pathlen + buflen - 1) / buflen; +} + +int +xfs_symlink_hdr_set( + struct xfs_mount *mp, + xfs_ino_t ino, + uint32_t offset, + uint32_t size, + struct xfs_buf *bp) +{ + struct xfs_dsymlink_hdr *dsl = bp->b_addr; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return 0; + + dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); + dsl->sl_offset = cpu_to_be32(offset); + dsl->sl_bytes = cpu_to_be32(size); + uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid); + dsl->sl_owner = cpu_to_be64(ino); + dsl->sl_blkno = cpu_to_be64(bp->b_bn); + bp->b_ops = &xfs_symlink_buf_ops; + + return sizeof(struct xfs_dsymlink_hdr); +} + +/* + * Checking of the symlink header is split into two parts. the verifier does + * CRC, location and bounds checking, the unpacking function checks the path + * parameters and owner. + */ +bool +xfs_symlink_hdr_ok( + xfs_ino_t ino, + uint32_t offset, + uint32_t size, + struct xfs_buf *bp) +{ + struct xfs_dsymlink_hdr *dsl = bp->b_addr; + + if (offset != be32_to_cpu(dsl->sl_offset)) + return false; + if (size != be32_to_cpu(dsl->sl_bytes)) + return false; + if (ino != be64_to_cpu(dsl->sl_owner)) + return false; + + /* ok */ + return true; +} + +static bool +xfs_symlink_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dsymlink_hdr *dsl = bp->b_addr; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) + return false; + if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid)) + return false; + if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) + return false; + if (be32_to_cpu(dsl->sl_offset) + + be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN) + return false; + if (dsl->sl_owner == 0) + return false; + + return true; +} + +static void +xfs_symlink_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* no verification of non-crc buffers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_symlink_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_symlink_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + /* no verification of non-crc buffers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_symlink_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (bip) { + struct xfs_dsymlink_hdr *dsl = bp->b_addr; + dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); + } + xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); +} + +const struct xfs_buf_ops xfs_symlink_buf_ops = { + .verify_read = xfs_symlink_read_verify, + .verify_write = xfs_symlink_write_verify, +}; + +void +xfs_symlink_local_to_remote( + struct xfs_trans *tp, + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp) +{ + struct xfs_mount *mp = ip->i_mount; + char *buf; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) { + bp->b_ops = NULL; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + return; + } + + /* + * As this symlink fits in an inode literal area, it must also fit in + * the smallest buffer the filesystem supports. + */ + ASSERT(BBTOB(bp->b_length) >= + ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr)); + + bp->b_ops = &xfs_symlink_buf_ops; + + buf = bp->b_addr; + buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); + memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); +} diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c new file mode 100644 index 000000000000..f2bda7c76b8a --- /dev/null +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -0,0 +1,894 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (C) 2010 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_qm.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" + +/* + * A buffer has a format structure overhead in the log in addition + * to the data, so we need to take this into account when reserving + * space in a transaction for a buffer. Round the space required up + * to a multiple of 128 bytes so that we don't change the historical + * reservation that has been used for this overhead. + */ +STATIC uint +xfs_buf_log_overhead(void) +{ + return round_up(sizeof(struct xlog_op_header) + + sizeof(struct xfs_buf_log_format), 128); +} + +/* + * Calculate out transaction log reservation per item in bytes. + * + * The nbufs argument is used to indicate the number of items that + * will be changed in a transaction. size is used to tell how many + * bytes should be reserved per item. + */ +STATIC uint +xfs_calc_buf_res( + uint nbufs, + uint size) +{ + return nbufs * (size + xfs_buf_log_overhead()); +} + +/* + * Logging inodes is really tricksy. They are logged in memory format, + * which means that what we write into the log doesn't directly translate into + * the amount of space they use on disk. + * + * Case in point - btree format forks in memory format use more space than the + * on-disk format. In memory, the buffer contains a normal btree block header so + * the btree code can treat it as though it is just another generic buffer. + * However, when we write it to the inode fork, we don't write all of this + * header as it isn't needed. e.g. the root is only ever in the inode, so + * there's no need for sibling pointers which would waste 16 bytes of space. + * + * Hence when we have an inode with a maximally sized btree format fork, then + * amount of information we actually log is greater than the size of the inode + * on disk. Hence we need an inode reservation function that calculates all this + * correctly. So, we log: + * + * - 4 log op headers for object + * - for the ilf, the inode core and 2 forks + * - inode log format object + * - the inode core + * - two inode forks containing bmap btree root blocks. + * - the btree data contained by both forks will fit into the inode size, + * hence when combined with the inode core above, we have a total of the + * actual inode size. + * - the BMBT headers need to be accounted separately, as they are + * additional to the records and pointers that fit inside the inode + * forks. + */ +STATIC uint +xfs_calc_inode_res( + struct xfs_mount *mp, + uint ninodes) +{ + return ninodes * + (4 * sizeof(struct xlog_op_header) + + sizeof(struct xfs_inode_log_format) + + mp->m_sb.sb_inodesize + + 2 * XFS_BMBT_BLOCK_LEN(mp)); +} + +/* + * The free inode btree is a conditional feature and the log reservation + * requirements differ slightly from that of the traditional inode allocation + * btree. The finobt tracks records for inode chunks with at least one free + * inode. A record can be removed from the tree for an inode allocation + * or free and thus the finobt reservation is unconditional across: + * + * - inode allocation + * - inode free + * - inode chunk allocation + * + * The 'modify' param indicates to include the record modification scenario. The + * 'alloc' param indicates to include the reservation for free space btree + * modifications on behalf of finobt modifications. This is required only for + * transactions that do not already account for free space btree modifications. + * + * the free inode btree: max depth * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the free inode btree entry: block size + */ +STATIC uint +xfs_calc_finobt_res( + struct xfs_mount *mp, + int alloc, + int modify) +{ + uint res; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); + if (alloc) + res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (modify) + res += (uint)XFS_FSB_TO_B(mp, 1); + + return res; +} + +/* + * Various log reservation values. + * + * These are based on the size of the file system block because that is what + * most transactions manipulate. Each adds in an additional 128 bytes per + * item logged to try to account for the overhead of the transaction mechanism. + * + * Note: Most of the reservations underestimate the number of allocation + * groups into which they could free extents in the xfs_bmap_finish() call. + * This is because the number in the worst case is quite high and quite + * unusual. In order to fix this we need to change xfs_bmap_finish() to free + * extents in only a single AG at a time. This will require changes to the + * EFI code as well, however, so that the EFI for the extents not freed is + * logged again in each transaction. See SGI PV #261917. + * + * Reservation functions here avoid a huge stack in xfs_trans_init due to + * register overflow from temporaries in the calculations. + */ + + +/* + * In a write transaction we can allocate a maximum of 2 + * extents. This gives: + * the inode getting the new extents: inode size + * the inode's bmap btree: max depth * block size + * the agfs of the ags from which the extents are allocated: 2 * sector + * the superblock free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + * And the bmap_finish transaction can free bmap blocks in a join: + * the agfs of the ags containing the blocks: 2 * sector size + * the agfls of the ags containing the blocks: 2 * sector size + * the super block free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_write_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * In truncating a file we free up to two extents at once. We can modify: + * the inode being truncated: inode size + * the inode's bmap btree: (max depth + 1) * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ +STATIC uint +xfs_calc_itruncate_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(5, 0) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + + mp->m_in_maxlevels, 0))); +} + +/* + * In renaming a files we can modify: + * the four inodes involved: 4 * inode size + * the two directory btrees: 2 * (max depth + v2) * dir block size + * the two directory bmap btrees: 2 * max depth * block size + * And the bmap_finish transaction can free dir and bmap blocks (two sets + * of bmap blocks) giving: + * the agf for the ags in which the blocks live: 3 * sector size + * the agfl for the ags in which the blocks live: 3 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_rename_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX((xfs_calc_inode_res(mp, 4) + + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * For removing an inode from unlinked list at first, we can modify: + * the agi hash list and counters: sector size + * the on disk inode before ours in the agi hash list: inode cluster size + */ +STATIC uint +xfs_calc_iunlink_remove_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); +} + +/* + * For creating a link to an inode: + * the parent directory inode: inode size + * the linked inode: inode size + * the directory btree could split: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free some bmap blocks giving: + * the agf for the ag in which the blocks live: sector size + * the agfl for the ag in which the blocks live: sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_link_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_iunlink_remove_reservation(mp) + + MAX((xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * For adding an inode to unlinked list we can modify: + * the agi hash list: sector size + * the unlinked inode: inode size + */ +STATIC uint +xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_inode_res(mp, 1); +} + +/* + * For removing a directory entry we can modify: + * the parent directory inode: inode size + * the removed inode: inode size + * the directory btree could join: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free the dir and bmap blocks giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_remove_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_iunlink_add_reservation(mp) + + MAX((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * For create, break it in to the two cases that the transaction + * covers. We start with the modify case - allocation done by modification + * of the state of existing inodes - and the allocation case. + */ + +/* + * For create we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: block size + * the superblock for the nlink flag: sector size + * the directory btree: (max depth + v2) * dir block size + * the directory inode's bmap btree: (max depth + v2) * block size + * the finobt (record modification and allocation btrees) + */ +STATIC uint +xfs_calc_create_resv_modify( + struct xfs_mount *mp) +{ + return xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + (uint)XFS_FSB_TO_B(mp, 1) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 1, 1); +} + +/* + * For create we can allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size + * the inode blocks allocated: mp->m_ialloc_blks * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ +STATIC uint +xfs_calc_create_resv_alloc( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + mp->m_sb.sb_sectsize + + xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +STATIC uint +__xfs_calc_create_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX(xfs_calc_create_resv_alloc(mp), + xfs_calc_create_resv_modify(mp)); +} + +/* + * For icreate we can allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion) + */ +STATIC uint +xfs_calc_icreate_resv_alloc( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + mp->m_sb.sb_sectsize + + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 0); +} + +STATIC uint +xfs_calc_icreate_reservation(xfs_mount_t *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX(xfs_calc_icreate_resv_alloc(mp), + xfs_calc_create_resv_modify(mp)); +} + +STATIC uint +xfs_calc_create_reservation( + struct xfs_mount *mp) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return xfs_calc_icreate_reservation(mp); + return __xfs_calc_create_reservation(mp); + +} + +STATIC uint +xfs_calc_create_tmpfile_reservation( + struct xfs_mount *mp) +{ + uint res = XFS_DQUOT_LOGRES(mp); + + if (xfs_sb_version_hascrc(&mp->m_sb)) + res += xfs_calc_icreate_resv_alloc(mp); + else + res += xfs_calc_create_resv_alloc(mp); + + return res + xfs_calc_iunlink_add_reservation(mp); +} + +/* + * Making a new directory is the same as creating a new file. + */ +STATIC uint +xfs_calc_mkdir_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_create_reservation(mp); +} + + +/* + * Making a new symplink is the same as creating a new file, but + * with the added blocks for remote symlink data which can be up to 1kB in + * length (MAXPATHLEN). + */ +STATIC uint +xfs_calc_symlink_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_create_reservation(mp) + + xfs_calc_buf_res(1, MAXPATHLEN); +} + +/* + * In freeing an inode we can modify: + * the inode being freed: inode size + * the super block free inode counter: sector size + * the agi hash list and counters: sector size + * the inode btree entry: block size + * the on disk inode before ours in the agi hash list: inode cluster size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion, removal or modification) + */ +STATIC uint +xfs_calc_ifree_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_iunlink_remove_reservation(mp) + + xfs_calc_buf_res(1, 0) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + + mp->m_in_maxlevels, 0) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 1); +} + +/* + * When only changing the inode we log the inode and possibly the superblock + * We also add a bit of slop for the transaction stuff. + */ +STATIC uint +xfs_calc_ichange_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); + +} + +/* + * Growing the data section of the filesystem. + * superblock + * agi and agf + * allocation btrees + */ +STATIC uint +xfs_calc_growdata_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * Growing the rt section of the filesystem. + * In the first set of transactions (ALLOC) we allocate space to the + * bitmap or summary files. + * superblock: sector size + * agf of the ag from which the extent is allocated: sector size + * bmap btree for bitmap/summary inode: max depth * blocksize + * bitmap/summary inode: inode size + * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize + */ +STATIC uint +xfs_calc_growrtalloc_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * Growing the rt section of the filesystem. + * In the second set of transactions (ZERO) we zero the new metadata blocks. + * one bitmap/summary block: blocksize + */ +STATIC uint +xfs_calc_growrtzero_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); +} + +/* + * Growing the rt section of the filesystem. + * In the third set of transactions (FREE) we update metadata without + * allocating any new blocks. + * superblock: sector size + * bitmap inode: inode size + * summary inode: inode size + * one bitmap block: blocksize + * summary blocks: new summary size + */ +STATIC uint +xfs_calc_growrtfree_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + + xfs_calc_buf_res(1, mp->m_rsumsize); +} + +/* + * Logging the inode modification timestamp on a synchronous write. + * inode + */ +STATIC uint +xfs_calc_swrite_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_inode_res(mp, 1); +} + +/* + * Logging the inode mode bits when writing a setuid/setgid file + * inode + */ +STATIC uint +xfs_calc_writeid_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_inode_res(mp, 1); +} + +/* + * Converting the inode from non-attributed to attributed. + * the inode being converted: inode size + * agf block and superblock (for block allocation) + * the new block (directory sized) + * bmap blocks for the new directory block + * allocation btrees + */ +STATIC uint +xfs_calc_addafork_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + + xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * Removing the attribute fork of a file + * the inode being truncated: inode size + * the inode's bmap btree: max depth * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_attrinval_reservation( + struct xfs_mount *mp) +{ + return MAX((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * Setting an attribute at mount time. + * the inode getting the attribute + * the superblock for allocations + * the agfs extents are allocated from + * the attribute btree * max depth + * the inode allocation btree + * Since attribute transaction space is dependent on the size of the attribute, + * the calculation is done partially at mount time and partially at runtime(see + * below). + */ +STATIC uint +xfs_calc_attrsetm_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); +} + +/* + * Setting an attribute at runtime, transaction space unit per block. + * the superblock for allocations: sector size + * the inode bmap btree could join or split: max depth * block size + * Since the runtime attribute transaction space is dependent on the total + * blocks needed for the 1st bmap, here we calculate out the space unit for + * one block so that the caller could figure out the total space according + * to the attibute extent length in blocks by: + * ext * M_RES(mp)->tr_attrsetrt.tr_logres + */ +STATIC uint +xfs_calc_attrsetrt_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * Removing an attribute. + * the inode: inode size + * the attribute btree could join: max depth * block size + * the inode bmap btree could join or split: max depth * block size + * And the bmap_finish transaction can free the attr blocks freed giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +STATIC uint +xfs_calc_attrrm_reservation( + struct xfs_mount *mp) +{ + return XFS_DQUOT_LOGRES(mp) + + MAX((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, + XFS_FSB_TO_B(mp, 1)) + + (uint)XFS_FSB_TO_B(mp, + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), + (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); +} + +/* + * Clearing a bad agino number in an agi hash bucket. + */ +STATIC uint +xfs_calc_clear_agi_bucket_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* + * Clearing the quotaflags in the superblock. + * the super block for changing quota flags: sector size + */ +STATIC uint +xfs_calc_qm_sbchange_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* + * Adjusting quota limits. + * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) + */ +STATIC uint +xfs_calc_qm_setqlim_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); +} + +/* + * Allocating quota on disk if needed. + * the write transaction log space for quota file extent allocation + * the unit of quota allocation: one system block size + */ +STATIC uint +xfs_calc_qm_dqalloc_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_write_reservation(mp) + + xfs_calc_buf_res(1, + XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); +} + +/* + * Turning off quotas. + * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + * the superblock for the quota flags: sector size + */ +STATIC uint +xfs_calc_qm_quotaoff_reservation( + struct xfs_mount *mp) +{ + return sizeof(struct xfs_qoff_logitem) * 2 + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* + * End of turning off quotas. + * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + */ +STATIC uint +xfs_calc_qm_quotaoff_end_reservation( + struct xfs_mount *mp) +{ + return sizeof(struct xfs_qoff_logitem) * 2; +} + +/* + * Syncing the incore super block changes to disk. + * the super block to reflect the changes: sector size + */ +STATIC uint +xfs_calc_sb_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +void +xfs_trans_resv_calc( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + /* + * The following transactions are logged in physical format and + * require a permanent reservation on space. + */ + resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); + resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; + resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); + resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; + resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); + resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT; + resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); + resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT; + resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); + resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT; + resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); + resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; + resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); + resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; + resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_create_tmpfile.tr_logres = + xfs_calc_create_tmpfile_reservation(mp); + resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; + resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); + resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; + resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); + resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; + resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp); + resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT; + resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp); + resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT; + resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp); + resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT; + resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp); + resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT; + resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp); + resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; + resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); + resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; + resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + /* + * The following transactions are logged in logical format with + * a default log count. + */ + resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); + resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; + + resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); + resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; + + resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp); + resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; + + resp->tr_qm_equotaoff.tr_logres = + xfs_calc_qm_quotaoff_end_reservation(mp); + resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; + + resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); + resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; + + /* The following transaction are logged in logical format */ + resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); + resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); + resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); + resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); + resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); + resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); + resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); + resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); +} diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c deleted file mode 100644 index d43813267a80..000000000000 --- a/fs/xfs/xfs_alloc.c +++ /dev/null @@ -1,2630 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_extent_busy.h" -#include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_trace.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" -#include "xfs_log.h" - -struct workqueue_struct *xfs_alloc_wq; - -#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) - -#define XFSA_FIXUP_BNO_OK 1 -#define XFSA_FIXUP_CNT_OK 2 - -STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); - -/* - * Lookup the record equal to [bno, len] in the btree given by cur. - */ -STATIC int /* error */ -xfs_alloc_lookup_eq( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_ge( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Lookup the first record less than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_le( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); -} - -/* - * Update the record referred to by cur to the value given - * by [bno, len]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -STATIC int /* error */ -xfs_alloc_update( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len) /* length of extent */ -{ - union xfs_btree_rec rec; - - rec.alloc.ar_startblock = cpu_to_be32(bno); - rec.alloc.ar_blockcount = cpu_to_be32(len); - return xfs_btree_update(cur, &rec); -} - -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_alloc_get_rec( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t *bno, /* output: starting block of extent */ - xfs_extlen_t *len, /* output: length of extent */ - int *stat) /* output: success/failure */ -{ - union xfs_btree_rec *rec; - int error; - - error = xfs_btree_get_rec(cur, &rec, stat); - if (!error && *stat == 1) { - *bno = be32_to_cpu(rec->alloc.ar_startblock); - *len = be32_to_cpu(rec->alloc.ar_blockcount); - } - return error; -} - -/* - * Compute aligned version of the found extent. - * Takes alignment and min length into account. - */ -STATIC void -xfs_alloc_compute_aligned( - xfs_alloc_arg_t *args, /* allocation argument structure */ - xfs_agblock_t foundbno, /* starting block in found extent */ - xfs_extlen_t foundlen, /* length in found extent */ - xfs_agblock_t *resbno, /* result block number */ - xfs_extlen_t *reslen) /* result length */ -{ - xfs_agblock_t bno; - xfs_extlen_t len; - - /* Trim busy sections out of found extent */ - xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); - - if (args->alignment > 1 && len >= args->minlen) { - xfs_agblock_t aligned_bno = roundup(bno, args->alignment); - xfs_extlen_t diff = aligned_bno - bno; - - *resbno = aligned_bno; - *reslen = diff >= len ? 0 : len - diff; - } else { - *resbno = bno; - *reslen = len; - } -} - -/* - * Compute best start block and diff for "near" allocations. - * freelen >= wantlen already checked by caller. - */ -STATIC xfs_extlen_t /* difference value (absolute) */ -xfs_alloc_compute_diff( - xfs_agblock_t wantbno, /* target starting block */ - xfs_extlen_t wantlen, /* target length */ - xfs_extlen_t alignment, /* target alignment */ - char userdata, /* are we allocating data? */ - xfs_agblock_t freebno, /* freespace's starting block */ - xfs_extlen_t freelen, /* freespace's length */ - xfs_agblock_t *newbnop) /* result: best start block from free */ -{ - xfs_agblock_t freeend; /* end of freespace extent */ - xfs_agblock_t newbno1; /* return block number */ - xfs_agblock_t newbno2; /* other new block number */ - xfs_extlen_t newlen1=0; /* length with newbno1 */ - xfs_extlen_t newlen2=0; /* length with newbno2 */ - xfs_agblock_t wantend; /* end of target extent */ - - ASSERT(freelen >= wantlen); - freeend = freebno + freelen; - wantend = wantbno + wantlen; - /* - * We want to allocate from the start of a free extent if it is past - * the desired block or if we are allocating user data and the free - * extent is before desired block. The second case is there to allow - * for contiguous allocation from the remaining free space if the file - * grows in the short term. - */ - if (freebno >= wantbno || (userdata && freeend < wantend)) { - if ((newbno1 = roundup(freebno, alignment)) >= freeend) - newbno1 = NULLAGBLOCK; - } else if (freeend >= wantend && alignment > 1) { - newbno1 = roundup(wantbno, alignment); - newbno2 = newbno1 - alignment; - if (newbno1 >= freeend) - newbno1 = NULLAGBLOCK; - else - newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); - if (newbno2 < freebno) - newbno2 = NULLAGBLOCK; - else - newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); - if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { - if (newlen1 < newlen2 || - (newlen1 == newlen2 && - XFS_ABSDIFF(newbno1, wantbno) > - XFS_ABSDIFF(newbno2, wantbno))) - newbno1 = newbno2; - } else if (newbno2 != NULLAGBLOCK) - newbno1 = newbno2; - } else if (freeend >= wantend) { - newbno1 = wantbno; - } else if (alignment > 1) { - newbno1 = roundup(freeend - wantlen, alignment); - if (newbno1 > freeend - wantlen && - newbno1 - alignment >= freebno) - newbno1 -= alignment; - else if (newbno1 >= freeend) - newbno1 = NULLAGBLOCK; - } else - newbno1 = freeend - wantlen; - *newbnop = newbno1; - return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); -} - -/* - * Fix up the length, based on mod and prod. - * len should be k * prod + mod for some k. - * If len is too small it is returned unchanged. - * If len hits maxlen it is left alone. - */ -STATIC void -xfs_alloc_fix_len( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_extlen_t k; - xfs_extlen_t rlen; - - ASSERT(args->mod < args->prod); - rlen = args->len; - ASSERT(rlen >= args->minlen); - ASSERT(rlen <= args->maxlen); - if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || - (args->mod == 0 && rlen < args->prod)) - return; - k = rlen % args->prod; - if (k == args->mod) - return; - if (k > args->mod) - rlen = rlen - (k - args->mod); - else - rlen = rlen - args->prod + (args->mod - k); - if ((int)rlen < (int)args->minlen) - return; - ASSERT(rlen >= args->minlen && rlen <= args->maxlen); - ASSERT(rlen % args->prod == args->mod); - args->len = rlen; -} - -/* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - if (args->len >= args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - -/* - * Update the two btrees, logically removing from freespace the extent - * starting at rbno, rlen blocks. The extent is contained within the - * actual (current) free extent fbno for flen blocks. - * Flags are passed in indicating whether the cursors are set to the - * relevant records. - */ -STATIC int /* error code */ -xfs_alloc_fixup_trees( - xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ - xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ - xfs_agblock_t fbno, /* starting block of free extent */ - xfs_extlen_t flen, /* length of free extent */ - xfs_agblock_t rbno, /* starting block of returned extent */ - xfs_extlen_t rlen, /* length of returned extent */ - int flags) /* flags, XFSA_FIXUP_... */ -{ - int error; /* error code */ - int i; /* operation results */ - xfs_agblock_t nfbno1; /* first new free startblock */ - xfs_agblock_t nfbno2; /* second new free startblock */ - xfs_extlen_t nflen1=0; /* first new free length */ - xfs_extlen_t nflen2=0; /* second new free length */ - - /* - * Look up the record in the by-size tree if necessary. - */ - if (flags & XFSA_FIXUP_CNT_OK) { -#ifdef DEBUG - if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN( - i == 1 && nfbno1 == fbno && nflen1 == flen); -#endif - } else { - if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - /* - * Look up the record in the by-block tree if necessary. - */ - if (flags & XFSA_FIXUP_BNO_OK) { -#ifdef DEBUG - if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN( - i == 1 && nfbno1 == fbno && nflen1 == flen); -#endif - } else { - if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - -#ifdef DEBUG - if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { - struct xfs_btree_block *bnoblock; - struct xfs_btree_block *cntblock; - - bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); - - XFS_WANT_CORRUPTED_RETURN( - bnoblock->bb_numrecs == cntblock->bb_numrecs); - } -#endif - - /* - * Deal with all four cases: the allocated record is contained - * within the freespace record, so we can have new freespace - * at either (or both) end, or no freespace remaining. - */ - if (rbno == fbno && rlen == flen) - nfbno1 = nfbno2 = NULLAGBLOCK; - else if (rbno == fbno) { - nfbno1 = rbno + rlen; - nflen1 = flen - rlen; - nfbno2 = NULLAGBLOCK; - } else if (rbno + rlen == fbno + flen) { - nfbno1 = fbno; - nflen1 = flen - rlen; - nfbno2 = NULLAGBLOCK; - } else { - nfbno1 = fbno; - nflen1 = rbno - fbno; - nfbno2 = rbno + rlen; - nflen2 = (fbno + flen) - nfbno2; - } - /* - * Delete the entry from the by-size btree. - */ - if ((error = xfs_btree_delete(cnt_cur, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - /* - * Add new by-size btree entry(s). - */ - if (nfbno1 != NULLAGBLOCK) { - if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_btree_insert(cnt_cur, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - if (nfbno2 != NULLAGBLOCK) { - if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_btree_insert(cnt_cur, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - /* - * Fix up the by-block btree entry(s). - */ - if (nfbno1 == NULLAGBLOCK) { - /* - * No remaining freespace, just delete the by-block tree entry. - */ - if ((error = xfs_btree_delete(bno_cur, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } else { - /* - * Update the by-block entry to start later|be shorter. - */ - if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) - return error; - } - if (nfbno2 != NULLAGBLOCK) { - /* - * 2 resulting free entries, need to add one. - */ - if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_btree_insert(bno_cur, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - return 0; -} - -static bool -xfs_agfl_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); - int i; - - if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) - return false; - if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) - return false; - /* - * during growfs operations, the perag is not fully initialised, - * so we can't use it for any useful checking. growfs ensures we can't - * use it by using uncached buffers that don't have the perag attached - * so we can detect and avoid this problem. - */ - if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) - return false; - - for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { - if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && - be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) - return false; - } - return true; -} - -static void -xfs_agfl_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - /* - * There is no verification of non-crc AGFLs because mkfs does not - * initialise the AGFL to zero or NULL. Hence the only valid part of the - * AGFL is what the AGF says is active. We can't get to the AGF, so we - * can't verify just those entries are valid. - */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_agfl_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - - /* no verification of non-crc AGFLs */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (bip) - XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); -} - -const struct xfs_buf_ops xfs_agfl_buf_ops = { - .verify_read = xfs_agfl_read_verify, - .verify_write = xfs_agfl_write_verify, -}; - -/* - * Read in the allocation group free block array. - */ -STATIC int /* error */ -xfs_alloc_read_agfl( - xfs_mount_t *mp, /* mount point structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* buffer for the ag free block array */ -{ - xfs_buf_t *bp; /* return value */ - int error; - - ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); - if (error) - return error; - xfs_buf_set_ref(bp, XFS_AGFL_REF); - *bpp = bp; - return 0; -} - -STATIC int -xfs_alloc_update_counters( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agbp, - long len) -{ - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - - pag->pagf_freeblks += len; - be32_add_cpu(&agf->agf_freeblks, len); - - xfs_trans_agblocks_delta(tp, len); - if (unlikely(be32_to_cpu(agf->agf_freeblks) > - be32_to_cpu(agf->agf_length))) - return EFSCORRUPTED; - - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); - return 0; -} - -/* - * Allocation group level functions. - */ - -/* - * Allocate a variable extent in the allocation group agno. - * Type and bno are used to determine where in the allocation group the - * extent will start. - * Extent's length (returned in *len) will be between minlen and maxlen, - * and of the form k * prod + mod unless there's nothing that large. - * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent( - xfs_alloc_arg_t *args) /* argument structure for allocation */ -{ - int error=0; - - ASSERT(args->minlen > 0); - ASSERT(args->maxlen > 0); - ASSERT(args->minlen <= args->maxlen); - ASSERT(args->mod < args->prod); - ASSERT(args->alignment > 0); - /* - * Branch to correct routine based on the type. - */ - args->wasfromfl = 0; - switch (args->type) { - case XFS_ALLOCTYPE_THIS_AG: - error = xfs_alloc_ag_vextent_size(args); - break; - case XFS_ALLOCTYPE_NEAR_BNO: - error = xfs_alloc_ag_vextent_near(args); - break; - case XFS_ALLOCTYPE_THIS_BNO: - error = xfs_alloc_ag_vextent_exact(args); - break; - default: - ASSERT(0); - /* NOTREACHED */ - } - - if (error || args->agbno == NULLAGBLOCK) - return error; - - ASSERT(args->len >= args->minlen); - ASSERT(args->len <= args->maxlen); - ASSERT(!args->wasfromfl || !args->isfl); - ASSERT(args->agbno % args->alignment == 0); - - if (!args->wasfromfl) { - error = xfs_alloc_update_counters(args->tp, args->pag, - args->agbp, - -((long)(args->len))); - if (error) - return error; - - ASSERT(!xfs_extent_busy_search(args->mp, args->agno, - args->agbno, args->len)); - } - - if (!args->isfl) { - xfs_trans_mod_sb(args->tp, args->wasdel ? - XFS_TRANS_SB_RES_FDBLOCKS : - XFS_TRANS_SB_FDBLOCKS, - -((long)(args->len))); - } - - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); - return error; -} - -/* - * Allocate a variable extent at exactly agno/bno. - * Extent's length (returned in *len) will be between minlen and maxlen, - * and of the form k * prod + mod unless there's nothing that large. - * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_exact( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ - xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ - int error; - xfs_agblock_t fbno; /* start block of found extent */ - xfs_extlen_t flen; /* length of found extent */ - xfs_agblock_t tbno; /* start block of trimmed extent */ - xfs_extlen_t tlen; /* length of trimmed extent */ - xfs_agblock_t tend; /* end block of trimmed extent */ - int i; /* success/failure of operation */ - - ASSERT(args->alignment == 1); - - /* - * Allocate/initialize a cursor for the by-number freespace btree. - */ - bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); - - /* - * Lookup bno and minlen in the btree (minlen is irrelevant, really). - * Look for the closest free block <= bno, it must contain bno - * if any free block does. - */ - error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); - if (error) - goto error0; - if (!i) - goto not_found; - - /* - * Grab the freespace record. - */ - error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - ASSERT(fbno <= args->agbno); - - /* - * Check for overlapping busy extents. - */ - xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); - - /* - * Give up if the start of the extent is busy, or the freespace isn't - * long enough for the minimum request. - */ - if (tbno > args->agbno) - goto not_found; - if (tlen < args->minlen) - goto not_found; - tend = tbno + tlen; - if (tend < args->agbno + args->minlen) - goto not_found; - - /* - * End of extent will be smaller of the freespace end and the - * maximal requested end. - * - * Fix the length according to mod and prod if given. - */ - args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - - args->agbno; - xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - - ASSERT(args->agbno + args->len <= tend); - - /* - * We are allocating agbno for args->len - * Allocate/initialize a cursor for the by-size btree. - */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); - ASSERT(args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, - args->len, XFSA_FIXUP_BNO_OK); - if (error) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); - goto error0; - } - - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - - args->wasfromfl = 0; - trace_xfs_alloc_exact_done(args); - return 0; - -not_found: - /* Didn't find it, return null. */ - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_exact_notfound(args); - return 0; - -error0: - xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); - trace_xfs_alloc_exact_error(args); - return error; -} - -/* - * Search the btree in a given direction via the search cursor and compare - * the records found against the good extent we've already found. - */ -STATIC int -xfs_alloc_find_best_extent( - struct xfs_alloc_arg *args, /* allocation argument structure */ - struct xfs_btree_cur **gcur, /* good cursor */ - struct xfs_btree_cur **scur, /* searching cursor */ - xfs_agblock_t gdiff, /* difference for search comparison */ - xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, /* extent length */ - xfs_agblock_t *sbnoa, /* aligned extent found by search */ - xfs_extlen_t *slena, /* aligned extent length */ - int dir) /* 0 = search right, 1 = search left */ -{ - xfs_agblock_t new; - xfs_agblock_t sdiff; - int error; - int i; - - /* The good extent is perfect, no need to search. */ - if (!gdiff) - goto out_use_good; - - /* - * Look until we find a better one, run out of space or run off the end. - */ - do { - error = xfs_alloc_get_rec(*scur, sbno, slen, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); - - /* - * The good extent is closer than this one. - */ - if (!dir) { - if (*sbnoa >= args->agbno + gdiff) - goto out_use_good; - } else { - if (*sbnoa <= args->agbno - gdiff) - goto out_use_good; - } - - /* - * Same distance, compare length and pick the best. - */ - if (*slena >= args->minlen) { - args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); - xfs_alloc_fix_len(args); - - sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, - args->userdata, *sbnoa, - *slena, &new); - - /* - * Choose closer size and invalidate other cursor. - */ - if (sdiff < gdiff) - goto out_use_search; - goto out_use_good; - } - - if (!dir) - error = xfs_btree_increment(*scur, 0, &i); - else - error = xfs_btree_decrement(*scur, 0, &i); - if (error) - goto error0; - } while (i); - -out_use_good: - xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); - *scur = NULL; - return 0; - -out_use_search: - xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); - *gcur = NULL; - return 0; - -error0: - /* caller invalidates cursors */ - return error; -} - -/* - * Allocate a variable extent near bno in the allocation group agno. - * Extent's length (returned in len) will be between minlen and maxlen, - * and of the form k * prod + mod unless there's nothing that large. - * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_near( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ - xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ - xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ - xfs_agblock_t gtbno; /* start bno of right side entry */ - xfs_agblock_t gtbnoa; /* aligned ... */ - xfs_extlen_t gtdiff; /* difference to right side entry */ - xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena; /* aligned ... */ - xfs_agblock_t gtnew; /* useful start bno of right side */ - int error; /* error code */ - int i; /* result code, temporary */ - int j; /* result code, temporary */ - xfs_agblock_t ltbno; /* start bno of left side entry */ - xfs_agblock_t ltbnoa; /* aligned ... */ - xfs_extlen_t ltdiff; /* difference to left side entry */ - xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena; /* aligned ... */ - xfs_agblock_t ltnew; /* useful start bno of left side */ - xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; -#ifdef DEBUG - /* - * Randomly don't execute the first algorithm. - */ - int dofirst; /* set to do first algorithm */ - - dofirst = prandom_u32() & 1; -#endif - -restart: - bno_cur_lt = NULL; - bno_cur_gt = NULL; - ltlen = 0; - gtlena = 0; - ltlena = 0; - - /* - * Get a cursor for the by-size btree. - */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); - - /* - * See if there are any free extents as big as maxlen. - */ - if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) - goto error0; - /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. - */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, - <len, &i))) - goto error0; - if (i == 0 || ltlen == 0) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_noentry(args); - return 0; - } - ASSERT(i == 1); - } - args->wasfromfl = 0; - - /* - * First algorithm. - * If the requested extent is large wrt the freespaces available - * in this a.g., then the cursor will be pointing to a btree entry - * near the right edge of the tree. If it's in the last btree leaf - * block, then we just examine all the entries in that block - * that are big enough, and pick the best one. - * This is written as a while loop so we can break out of it, - * but we never loop back to the top. - */ - while (xfs_btree_islastblock(cnt_cur, 0)) { - xfs_extlen_t bdiff; - int besti=0; - xfs_extlen_t blen=0; - xfs_agblock_t bnew=0; - -#ifdef DEBUG - if (dofirst) - break; -#endif - /* - * Start from the entry that lookup found, sequence through - * all larger free blocks. If we're actually pointing at a - * record smaller than maxlen, go to the start of this block, - * and skip all those smaller than minlen. - */ - if (ltlen || args->alignment > 1) { - cnt_cur->bc_ptrs[0] = 1; - do { - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, - <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (ltlen >= args->minlen) - break; - if ((error = xfs_btree_increment(cnt_cur, 0, &i))) - goto error0; - } while (i); - ASSERT(ltlen >= args->minlen); - if (!i) - break; - } - i = cnt_cur->bc_ptrs[0]; - for (j = 1, blen = 0, bdiff = 0; - !error && j && (blen < args->maxlen || bdiff > 0); - error = xfs_btree_increment(cnt_cur, 0, &j)) { - /* - * For each entry, decide if it's better than - * the previous best entry. - */ - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); - if (ltlena < args->minlen) - continue; - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - ASSERT(args->len >= args->minlen); - if (args->len < blen) - continue; - ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, ltbnoa, - ltlena, <new); - if (ltnew != NULLAGBLOCK && - (args->len > blen || ltdiff < bdiff)) { - bdiff = ltdiff; - bnew = ltnew; - blen = args->len; - besti = cnt_cur->bc_ptrs[0]; - } - } - /* - * It didn't work. We COULD be in a case where - * there's a good record somewhere, so try again. - */ - if (blen == 0) - break; - /* - * Point at the best entry, and retrieve it again. - */ - cnt_cur->bc_ptrs[0] = besti; - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; - /* - * We are allocating starting at bnew for blen blocks. - */ - args->agbno = bnew; - ASSERT(bnew >= ltbno); - ASSERT(bnew + blen <= ltbno + ltlen); - /* - * Set up a cursor for the by-bno tree. - */ - bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO); - /* - * Fix up the btree entries. - */ - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, - ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) - goto error0; - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - - trace_xfs_alloc_near_first(args); - return 0; - } - /* - * Second algorithm. - * Search in the by-bno tree to the left and to the right - * simultaneously, until in each case we find a space big enough, - * or run into the edge of the tree. When we run into the edge, - * we deallocate that cursor. - * If both searches succeed, we compare the two spaces and pick - * the better one. - * With alignment, it's possible for both to fail; the upper - * level algorithm that picks allocation groups for allocations - * is not supposed to do this. - */ - /* - * Allocate and initialize the cursor for the leftward search. - */ - bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); - /* - * Lookup <= bno to find the leftward search's starting point. - */ - if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) - goto error0; - if (!i) { - /* - * Didn't find anything; use this cursor for the rightward - * search. - */ - bno_cur_gt = bno_cur_lt; - bno_cur_lt = NULL; - } - /* - * Found something. Duplicate the cursor for the rightward search. - */ - else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) - goto error0; - /* - * Increment the cursor, so we will point at the entry just right - * of the leftward entry if any, or to the leftmost entry. - */ - if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) - goto error0; - if (!i) { - /* - * It failed, there are no rightward entries. - */ - xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - /* - * Loop going left with the leftward cursor, right with the - * rightward cursor, until either both directions give up or - * we find an entry at least as big as minlen. - */ - do { - if (bno_cur_lt) { - if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); - if (ltlena >= args->minlen) - break; - if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) - goto error0; - if (!i) { - xfs_btree_del_cursor(bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - } - } - if (bno_cur_gt) { - if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(args, gtbno, gtlen, - >bnoa, >lena); - if (gtlena >= args->minlen) - break; - if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) - goto error0; - if (!i) { - xfs_btree_del_cursor(bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - } - } while (bno_cur_lt || bno_cur_gt); - - /* - * Got both cursors still active, need to find better entry. - */ - if (bno_cur_lt && bno_cur_gt) { - if (ltlena >= args->minlen) { - /* - * Left side is good, look for a right side entry. - */ - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, ltbnoa, - ltlena, <new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, - >bnoa, >lena, - 0 /* search right */); - } else { - ASSERT(gtlena >= args->minlen); - - /* - * Right side is good, look for a left side entry. - */ - args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); - xfs_alloc_fix_len(args); - gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, gtbnoa, - gtlena, >new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, - <bnoa, <lena, - 1 /* search left */); - } - - if (error) - goto error0; - } - - /* - * If we couldn't get anything, give up. - */ - if (bno_cur_lt == NULL && bno_cur_gt == NULL) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - - if (!forced++) { - trace_xfs_alloc_near_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); - goto restart; - } - trace_xfs_alloc_size_neither(args); - args->agbno = NULLAGBLOCK; - return 0; - } - - /* - * At this point we have selected a freespace entry, either to the - * left or to the right. If it's on the right, copy all the - * useful variables to the "left" set so we only have one - * copy of this code. - */ - if (bno_cur_gt) { - bno_cur_lt = bno_cur_gt; - bno_cur_gt = NULL; - ltbno = gtbno; - ltbnoa = gtbnoa; - ltlen = gtlen; - ltlena = gtlena; - j = 1; - } else - j = 0; - - /* - * Fix up the length and compute the useful address. - */ - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } - rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, - args->userdata, ltbnoa, ltlena, <new); - ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltbnoa + ltlena); - ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - args->agbno = ltnew; - - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, - ltnew, rlen, XFSA_FIXUP_BNO_OK))) - goto error0; - - if (j) - trace_xfs_alloc_near_greater(args); - else - trace_xfs_alloc_near_lesser(args); - - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - return 0; - - error0: - trace_xfs_alloc_near_error(args); - if (cnt_cur != NULL) - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); - if (bno_cur_lt != NULL) - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); - if (bno_cur_gt != NULL) - xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); - return error; -} - -/* - * Allocate a variable extent anywhere in the allocation group agno. - * Extent's length (returned in len) will be between minlen and maxlen, - * and of the form k * prod + mod unless there's nothing that large. - * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_size( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ - int error; /* error result */ - xfs_agblock_t fbno; /* start of found freespace */ - xfs_extlen_t flen; /* length of found freespace */ - int i; /* temp status variable */ - xfs_agblock_t rbno; /* returned block number */ - xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; - -restart: - /* - * Allocate and initialize a cursor for the by-size btree. - */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); - bno_cur = NULL; - - /* - * Look for an entry >= maxlen+alignment-1 blocks. - */ - if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, - args->maxlen + args->alignment - 1, &i))) - goto error0; - - /* - * If none or we have busy extents that we cannot allocate from, then - * we have to settle for a smaller extent. In the case that there are - * no large extents, this will return the last entry in the tree unless - * the tree is empty. In the case that there are only busy large - * extents, this will return the largest small extent unless there - * are no smaller extents available. - */ - if (!i || forced > 1) { - error = xfs_alloc_ag_vextent_small(args, cnt_cur, - &fbno, &flen, &i); - if (error) - goto error0; - if (i == 0 || flen == 0) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_noentry(args); - return 0; - } - ASSERT(i == 1); - xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); - } else { - /* - * Search for a non-busy extent that is large enough. - * If we are at low space, don't check, or if we fall of - * the end of the btree, turn off the busy check and - * restart. - */ - for (;;) { - error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); - - if (rlen >= args->maxlen) - break; - - error = xfs_btree_increment(cnt_cur, 0, &i); - if (error) - goto error0; - if (i == 0) { - /* - * Our only valid extents must have been busy. - * Make it unbusy by forcing the log out and - * retrying. If we've been here before, forcing - * the log isn't making the extents available, - * which means they have probably been freed in - * this transaction. In that case, we have to - * give up on them and we'll attempt a minlen - * allocation the next time around. - */ - xfs_btree_del_cursor(cnt_cur, - XFS_BTREE_NOERROR); - trace_xfs_alloc_size_busy(args); - if (!forced++) - xfs_log_force(args->mp, XFS_LOG_SYNC); - goto restart; - } - } - } - - /* - * In the first case above, we got the last entry in the - * by-size btree. Now we check to see if the space hits maxlen - * once aligned; if not, we search left for something better. - * This can't happen in the second case above. - */ - rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(rlen == 0 || - (rlen <= flen && rbno + rlen <= fbno + flen), error0); - if (rlen < args->maxlen) { - xfs_agblock_t bestfbno; - xfs_extlen_t bestflen; - xfs_agblock_t bestrbno; - xfs_extlen_t bestrlen; - - bestrlen = rlen; - bestrbno = rbno; - bestflen = flen; - bestfbno = fbno; - for (;;) { - if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) - goto error0; - if (i == 0) - break; - if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, - &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (flen < bestrlen) - break; - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); - rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(rlen == 0 || - (rlen <= flen && rbno + rlen <= fbno + flen), - error0); - if (rlen > bestrlen) { - bestrlen = rlen; - bestrbno = rbno; - bestflen = flen; - bestfbno = fbno; - if (rlen == args->maxlen) - break; - } - } - if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, - &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - rlen = bestrlen; - rbno = bestrbno; - flen = bestflen; - fbno = bestfbno; - } - args->wasfromfl = 0; - /* - * Fix up the length. - */ - args->len = rlen; - if (rlen < args->minlen) { - if (!forced++) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); - goto restart; - } - goto out_nominleft; - } - xfs_alloc_fix_len(args); - - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; - rlen = args->len; - XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); - /* - * Allocate and initialize a cursor for the by-block tree. - */ - bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, - rbno, rlen, XFSA_FIXUP_CNT_OK))) - goto error0; - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - cnt_cur = bno_cur = NULL; - args->len = rlen; - args->agbno = rbno; - XFS_WANT_CORRUPTED_GOTO( - args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error0); - trace_xfs_alloc_size_done(args); - return 0; - -error0: - trace_xfs_alloc_size_error(args); - if (cnt_cur) - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); - if (bno_cur) - xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); - return error; - -out_nominleft: - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_nominleft(args); - args->agbno = NULLAGBLOCK; - return 0; -} - -/* - * Deal with the case where only small freespaces remain. - * Either return the contents of the last freespace record, - * or allocate space from the freelist if there is nothing in the tree. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_small( - xfs_alloc_arg_t *args, /* allocation argument structure */ - xfs_btree_cur_t *ccur, /* by-size cursor */ - xfs_agblock_t *fbnop, /* result block number */ - xfs_extlen_t *flenp, /* result length */ - int *stat) /* status: 0-freelist, 1-normal/none */ -{ - int error; - xfs_agblock_t fbno; - xfs_extlen_t flen; - int i; - - if ((error = xfs_btree_decrement(ccur, 0, &i))) - goto error0; - if (i) { - if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - /* - * Nothing in the btree, try the freelist. Make sure - * to respect minleft even when pulling from the - * freelist. - */ - else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) - > args->minleft)) { - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); - if (error) - goto error0; - if (fbno != NULLAGBLOCK) { - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, - args->userdata); - - if (args->userdata) { - xfs_buf_t *bp; - - bp = xfs_btree_get_bufs(args->mp, args->tp, - args->agno, fbno, 0); - xfs_trans_binval(args->tp, bp); - } - args->len = 1; - args->agbno = fbno; - XFS_WANT_CORRUPTED_GOTO( - args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error0); - args->wasfromfl = 1; - trace_xfs_alloc_small_freelist(args); - *stat = 0; - return 0; - } - /* - * Nothing in the freelist. - */ - else - flen = 0; - } - /* - * Can't allocate from the freelist for some reason. - */ - else { - fbno = NULLAGBLOCK; - flen = 0; - } - /* - * Can't do the allocation, give up. - */ - if (flen < args->minlen) { - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_small_notenough(args); - flen = 0; - } - *fbnop = fbno; - *flenp = flen; - *stat = 1; - trace_xfs_alloc_small_done(args); - return 0; - -error0: - trace_xfs_alloc_small_error(args); - return error; -} - -/* - * Free the extent starting at agno/bno for length. - */ -STATIC int /* error */ -xfs_free_ag_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t bno, /* starting block number */ - xfs_extlen_t len, /* length of extent */ - int isfl) /* set if is freelist blocks - no sb acctg */ -{ - xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ - int error; /* error return value */ - xfs_agblock_t gtbno; /* start of right neighbor block */ - xfs_extlen_t gtlen; /* length of right neighbor block */ - int haveleft; /* have a left neighbor block */ - int haveright; /* have a right neighbor block */ - int i; /* temp, result code */ - xfs_agblock_t ltbno; /* start of left neighbor block */ - xfs_extlen_t ltlen; /* length of left neighbor block */ - xfs_mount_t *mp; /* mount point struct for filesystem */ - xfs_agblock_t nbno; /* new starting block of freespace */ - xfs_extlen_t nlen; /* new length of freespace */ - xfs_perag_t *pag; /* per allocation group data */ - - mp = tp->t_mountp; - /* - * Allocate and initialize a cursor for the by-block btree. - */ - bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); - cnt_cur = NULL; - /* - * Look for a neighboring block on the left (lower block numbers) - * that is contiguous with this space. - */ - if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) - goto error0; - if (haveleft) { - /* - * There is a block to our left. - */ - if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * It's not contiguous, though. - */ - if (ltbno + ltlen < bno) - haveleft = 0; - else { - /* - * If this failure happens the request to free this - * space was invalid, it's (partly) already free. - * Very bad. - */ - XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); - } - } - /* - * Look for a neighboring block on the right (higher block numbers) - * that is contiguous with this space. - */ - if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) - goto error0; - if (haveright) { - /* - * There is a block to our right. - */ - if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * It's not contiguous, though. - */ - if (bno + len < gtbno) - haveright = 0; - else { - /* - * If this failure happens the request to free this - * space was invalid, it's (partly) already free. - * Very bad. - */ - XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); - } - } - /* - * Now allocate and initialize a cursor for the by-size tree. - */ - cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); - /* - * Have both left and right contiguous neighbors. - * Merge all three into a single free block. - */ - if (haveleft && haveright) { - /* - * Delete the old by-size entry on the left. - */ - if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_delete(cnt_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Delete the old by-size entry on the right. - */ - if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_delete(cnt_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Delete the old by-block entry for the right block. - */ - if ((error = xfs_btree_delete(bno_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Move the by-block cursor back to the left neighbor. - */ - if ((error = xfs_btree_decrement(bno_cur, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); -#ifdef DEBUG - /* - * Check that this is the right record: delete didn't - * mangle the cursor. - */ - { - xfs_agblock_t xxbno; - xfs_extlen_t xxlen; - - if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, - &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO( - i == 1 && xxbno == ltbno && xxlen == ltlen, - error0); - } -#endif - /* - * Update remaining by-block entry to the new, joined block. - */ - nbno = ltbno; - nlen = len + ltlen + gtlen; - if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) - goto error0; - } - /* - * Have only a left contiguous neighbor. - * Merge it together with the new freespace. - */ - else if (haveleft) { - /* - * Delete the old by-size entry on the left. - */ - if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_delete(cnt_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Back up the by-block cursor to the left neighbor, and - * update its length. - */ - if ((error = xfs_btree_decrement(bno_cur, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - nbno = ltbno; - nlen = len + ltlen; - if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) - goto error0; - } - /* - * Have only a right contiguous neighbor. - * Merge it together with the new freespace. - */ - else if (haveright) { - /* - * Delete the old by-size entry on the right. - */ - if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_delete(cnt_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Update the starting block and length of the right - * neighbor in the by-block tree. - */ - nbno = bno; - nlen = len + gtlen; - if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) - goto error0; - } - /* - * No contiguous neighbors. - * Insert the new freespace into the by-block tree. - */ - else { - nbno = bno; - nlen = len; - if ((error = xfs_btree_insert(bno_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - bno_cur = NULL; - /* - * In all cases we need to insert the new freespace in the by-size tree. - */ - if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 0, error0); - if ((error = xfs_btree_insert(cnt_cur, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - cnt_cur = NULL; - - /* - * Update the freespace totals in the ag and superblock. - */ - pag = xfs_perag_get(mp, agno); - error = xfs_alloc_update_counters(tp, pag, agbp, len); - xfs_perag_put(pag); - if (error) - goto error0; - - if (!isfl) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); - - trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); - - return 0; - - error0: - trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); - if (bno_cur) - xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); - if (cnt_cur) - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); - return error; -} - -/* - * Visible (exported) allocation/free functions. - * Some of these are used just by xfs_alloc_btree.c and this file. - */ - -/* - * Compute and fill in value of m_ag_maxlevels. - */ -void -xfs_alloc_compute_maxlevels( - xfs_mount_t *mp) /* file system mount structure */ -{ - int level; - uint maxblocks; - uint maxleafents; - int minleafrecs; - int minnoderecs; - - maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; - minleafrecs = mp->m_alloc_mnr[0]; - minnoderecs = mp->m_alloc_mnr[1]; - maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; - for (level = 1; maxblocks > 1; level++) - maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; - mp->m_ag_maxlevels = level; -} - -/* - * Find the length of the longest extent in an AG. - */ -xfs_extlen_t -xfs_alloc_longest_free_extent( - struct xfs_mount *mp, - struct xfs_perag *pag) -{ - xfs_extlen_t need, delta = 0; - - need = XFS_MIN_FREELIST_PAG(pag, mp); - if (need > pag->pagf_flcount) - delta = need - pag->pagf_flcount; - - if (pag->pagf_longest > delta) - return pag->pagf_longest - delta; - return pag->pagf_flcount > 0 || pag->pagf_longest > 0; -} - -/* - * Decide whether to use this allocation group for this allocation. - * If so, fix up the btree freelist's size. - */ -STATIC int /* error */ -xfs_alloc_fix_freelist( - xfs_alloc_arg_t *args, /* allocation argument structure */ - int flags) /* XFS_ALLOC_FLAG_... */ -{ - xfs_buf_t *agbp; /* agf buffer pointer */ - xfs_agf_t *agf; /* a.g. freespace structure pointer */ - xfs_buf_t *agflbp;/* agfl buffer pointer */ - xfs_agblock_t bno; /* freelist block */ - xfs_extlen_t delta; /* new blocks needed in freelist */ - int error; /* error result code */ - xfs_extlen_t longest;/* longest extent in allocation group */ - xfs_mount_t *mp; /* file system mount point structure */ - xfs_extlen_t need; /* total blocks needed in freelist */ - xfs_perag_t *pag; /* per-ag information structure */ - xfs_alloc_arg_t targs; /* local allocation arguments */ - xfs_trans_t *tp; /* transaction pointer */ - - mp = args->mp; - - pag = args->pag; - tp = args->tp; - if (!pag->pagf_init) { - if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, - &agbp))) - return error; - if (!pag->pagf_init) { - ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; - } - } else - agbp = NULL; - - /* - * If this is a metadata preferred pag and we are user data - * then try somewhere else if we are not being asked to - * try harder at this point - */ - if (pag->pagf_metadata && args->userdata && - (flags & XFS_ALLOC_FLAG_TRYLOCK)) { - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; - } - - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - /* - * If it looks like there isn't a long enough extent, or enough - * total blocks, reject it. - */ - need = XFS_MIN_FREELIST_PAG(pag, mp); - longest = xfs_alloc_longest_free_extent(mp, pag); - if ((args->minlen + args->alignment + args->minalignslop - 1) > - longest || - ((int)(pag->pagf_freeblks + pag->pagf_flcount - - need - args->total) < (int)args->minleft)) { - if (agbp) - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; - } - } - - /* - * Get the a.g. freespace buffer. - * Can fail if we're not blocking on locks, and it's held. - */ - if (agbp == NULL) { - if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, - &agbp))) - return error; - if (agbp == NULL) { - ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; - } - } - /* - * Figure out how many blocks we should have in the freelist. - */ - agf = XFS_BUF_TO_AGF(agbp); - need = XFS_MIN_FREELIST(agf, mp); - /* - * If there isn't enough total or single-extent, reject it. - */ - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - delta = need > be32_to_cpu(agf->agf_flcount) ? - (need - be32_to_cpu(agf->agf_flcount)) : 0; - longest = be32_to_cpu(agf->agf_longest); - longest = (longest > delta) ? (longest - delta) : - (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); - if ((args->minlen + args->alignment + args->minalignslop - 1) > - longest || - ((int)(be32_to_cpu(agf->agf_freeblks) + - be32_to_cpu(agf->agf_flcount) - need - args->total) < - (int)args->minleft)) { - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; - } - } - /* - * Make the freelist shorter if it's too long. - */ - while (be32_to_cpu(agf->agf_flcount) > need) { - xfs_buf_t *bp; - - error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); - if (error) - return error; - if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) - return error; - bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); - xfs_trans_binval(tp, bp); - } - /* - * Initialize the args structure. - */ - memset(&targs, 0, sizeof(targs)); - targs.tp = tp; - targs.mp = mp; - targs.agbp = agbp; - targs.agno = args->agno; - targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; - targs.type = XFS_ALLOCTYPE_THIS_AG; - targs.pag = pag; - if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) - return error; - /* - * Make the freelist longer if it's too short. - */ - while (be32_to_cpu(agf->agf_flcount) < need) { - targs.agbno = 0; - targs.maxlen = need - be32_to_cpu(agf->agf_flcount); - /* - * Allocate as many blocks as possible at once. - */ - if ((error = xfs_alloc_ag_vextent(&targs))) { - xfs_trans_brelse(tp, agflbp); - return error; - } - /* - * Stop if we run out. Won't happen if callers are obeying - * the restrictions correctly. Can happen for free calls - * on a completely full ag. - */ - if (targs.agbno == NULLAGBLOCK) { - if (flags & XFS_ALLOC_FLAG_FREEING) - break; - xfs_trans_brelse(tp, agflbp); - args->agbp = NULL; - return 0; - } - /* - * Put each allocated block on the list. - */ - for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { - error = xfs_alloc_put_freelist(tp, agbp, - agflbp, bno, 0); - if (error) - return error; - } - } - xfs_trans_brelse(tp, agflbp); - args->agbp = agbp; - return 0; -} - -/* - * Get a block from the freelist. - * Returns with the buffer for the block gotten. - */ -int /* error */ -xfs_alloc_get_freelist( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer containing the agf structure */ - xfs_agblock_t *bnop, /* block address retrieved from freelist */ - int btreeblk) /* destination is a AGF btree */ -{ - xfs_agf_t *agf; /* a.g. freespace structure */ - xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ - xfs_agblock_t bno; /* block number returned */ - __be32 *agfl_bno; - int error; - int logflags; - xfs_mount_t *mp = tp->t_mountp; - xfs_perag_t *pag; /* per allocation group data */ - - /* - * Freelist is empty, give up. - */ - agf = XFS_BUF_TO_AGF(agbp); - if (!agf->agf_flcount) { - *bnop = NULLAGBLOCK; - return 0; - } - /* - * Read the array of free blocks. - */ - error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), - &agflbp); - if (error) - return error; - - - /* - * Get the block number and update the data structures. - */ - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); - bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); - be32_add_cpu(&agf->agf_flfirst, 1); - xfs_trans_brelse(tp, agflbp); - if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) - agf->agf_flfirst = 0; - - pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); - be32_add_cpu(&agf->agf_flcount, -1); - xfs_trans_agflist_delta(tp, -1); - pag->pagf_flcount--; - xfs_perag_put(pag); - - logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; - if (btreeblk) { - be32_add_cpu(&agf->agf_btreeblks, 1); - pag->pagf_btreeblks++; - logflags |= XFS_AGF_BTREEBLKS; - } - - xfs_alloc_log_agf(tp, agbp, logflags); - *bnop = bno; - - return 0; -} - -/* - * Log the given fields from the agf structure. - */ -void -xfs_alloc_log_agf( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer for a.g. freelist header */ - int fields) /* mask of fields to be logged (XFS_AGF_...) */ -{ - int first; /* first byte offset */ - int last; /* last byte offset */ - static const short offsets[] = { - offsetof(xfs_agf_t, agf_magicnum), - offsetof(xfs_agf_t, agf_versionnum), - offsetof(xfs_agf_t, agf_seqno), - offsetof(xfs_agf_t, agf_length), - offsetof(xfs_agf_t, agf_roots[0]), - offsetof(xfs_agf_t, agf_levels[0]), - offsetof(xfs_agf_t, agf_flfirst), - offsetof(xfs_agf_t, agf_fllast), - offsetof(xfs_agf_t, agf_flcount), - offsetof(xfs_agf_t, agf_freeblks), - offsetof(xfs_agf_t, agf_longest), - offsetof(xfs_agf_t, agf_btreeblks), - offsetof(xfs_agf_t, agf_uuid), - sizeof(xfs_agf_t) - }; - - trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); - - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); - - xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); - xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); -} - -/* - * Interface for inode allocation to force the pag data to be initialized. - */ -int /* error */ -xfs_alloc_pagf_init( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags) /* XFS_ALLOC_FLAGS_... */ -{ - xfs_buf_t *bp; - int error; - - if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) - return error; - if (bp) - xfs_trans_brelse(tp, bp); - return 0; -} - -/* - * Put the block on the freelist for the allocation group. - */ -int /* error */ -xfs_alloc_put_freelist( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_buf_t *agflbp,/* buffer for a.g. free block array */ - xfs_agblock_t bno, /* block being freed */ - int btreeblk) /* block came from a AGF btree */ -{ - xfs_agf_t *agf; /* a.g. freespace structure */ - __be32 *blockp;/* pointer to array entry */ - int error; - int logflags; - xfs_mount_t *mp; /* mount structure */ - xfs_perag_t *pag; /* per allocation group data */ - __be32 *agfl_bno; - int startoff; - - agf = XFS_BUF_TO_AGF(agbp); - mp = tp->t_mountp; - - if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, - be32_to_cpu(agf->agf_seqno), &agflbp))) - return error; - be32_add_cpu(&agf->agf_fllast, 1); - if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) - agf->agf_fllast = 0; - - pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); - be32_add_cpu(&agf->agf_flcount, 1); - xfs_trans_agflist_delta(tp, 1); - pag->pagf_flcount++; - - logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; - if (btreeblk) { - be32_add_cpu(&agf->agf_btreeblks, -1); - pag->pagf_btreeblks--; - logflags |= XFS_AGF_BTREEBLKS; - } - xfs_perag_put(pag); - - xfs_alloc_log_agf(tp, agbp, logflags); - - ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); - - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); - blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; - *blockp = cpu_to_be32(bno); - startoff = (char *)blockp - (char *)agflbp->b_addr; - - xfs_alloc_log_agf(tp, agbp, logflags); - - xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); - xfs_trans_log_buf(tp, agflbp, startoff, - startoff + sizeof(xfs_agblock_t) - 1); - return 0; -} - -static bool -xfs_agf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) - { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) - return false; - - if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && - XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && - be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) - return false; - - /* - * during growfs operations, the perag is not fully initialised, - * so we can't use it for any useful checking. growfs ensures we can't - * use it by using uncached buffers that don't have the perag attached - * so we can detect and avoid this problem. - */ - if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return false; - - if (xfs_sb_version_haslazysbcount(&mp->m_sb) && - be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return false; - - return true;; - -} - -static void -xfs_agf_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_agf_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); -} - -const struct xfs_buf_ops xfs_agf_buf_ops = { - .verify_read = xfs_agf_read_verify, - .verify_write = xfs_agf_write_verify, -}; - -/* - * Read in the allocation group header (free/alloc section). - */ -int /* error */ -xfs_read_agf( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_BUF_ */ - struct xfs_buf **bpp) /* buffer for the ag freelist header */ -{ - int error; - - trace_xfs_read_agf(mp, agno); - - ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); - if (error) - return error; - if (!*bpp) - return 0; - - ASSERT(!(*bpp)->b_error); - xfs_buf_set_ref(*bpp, XFS_AGF_REF); - return 0; -} - -/* - * Read in the allocation group header (free/alloc section). - */ -int /* error */ -xfs_alloc_read_agf( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - struct xfs_buf **bpp) /* buffer for the ag freelist header */ -{ - struct xfs_agf *agf; /* ag freelist header */ - struct xfs_perag *pag; /* per allocation group data */ - int error; - - trace_xfs_alloc_read_agf(mp, agno); - - ASSERT(agno != NULLAGNUMBER); - error = xfs_read_agf(mp, tp, agno, - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, - bpp); - if (error) - return error; - if (!*bpp) - return 0; - ASSERT(!(*bpp)->b_error); - - agf = XFS_BUF_TO_AGF(*bpp); - pag = xfs_perag_get(mp, agno); - if (!pag->pagf_init) { - pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); - pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); - pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); - pag->pagf_longest = be32_to_cpu(agf->agf_longest); - pag->pagf_levels[XFS_BTNUM_BNOi] = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); - pag->pagf_levels[XFS_BTNUM_CNTi] = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); - spin_lock_init(&pag->pagb_lock); - pag->pagb_count = 0; - pag->pagb_tree = RB_ROOT; - pag->pagf_init = 1; - } -#ifdef DEBUG - else if (!XFS_FORCED_SHUTDOWN(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); - ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); - } -#endif - xfs_perag_put(pag); - return 0; -} - -/* - * Allocate an extent (variable-size). - * Depending on the allocation type, we either look in a single allocation - * group or loop over the allocation groups to find the result. - */ -int /* error */ -xfs_alloc_vextent( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agblock_t agsize; /* allocation group size */ - int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ - xfs_mount_t *mp; /* mount structure pointer */ - xfs_agnumber_t sagno; /* starting allocation group number */ - xfs_alloctype_t type; /* input allocation type */ - int bump_rotor = 0; - int no_min = 0; - xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ - - mp = args->mp; - type = args->otype = args->type; - args->agbno = NULLAGBLOCK; - /* - * Just fix this up, for the case where the last a.g. is shorter - * (or there's only one a.g.) and the caller couldn't easily figure - * that out (xfs_bmap_alloc). - */ - agsize = mp->m_sb.sb_agblocks; - if (args->maxlen > agsize) - args->maxlen = agsize; - if (args->alignment == 0) - args->alignment = 1; - ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); - ASSERT(args->minlen <= args->maxlen); - ASSERT(args->minlen <= agsize); - ASSERT(args->mod < args->prod); - if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || - XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || - args->minlen > args->maxlen || args->minlen > agsize || - args->mod >= args->prod) { - args->fsbno = NULLFSBLOCK; - trace_xfs_alloc_vextent_badargs(args); - return 0; - } - minleft = args->minleft; - - switch (type) { - case XFS_ALLOCTYPE_THIS_AG: - case XFS_ALLOCTYPE_NEAR_BNO: - case XFS_ALLOCTYPE_THIS_BNO: - /* - * These three force us into a single a.g. - */ - args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; - error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; - if (error) { - trace_xfs_alloc_vextent_nofix(args); - goto error0; - } - if (!args->agbp) { - trace_xfs_alloc_vextent_noagbp(args); - break; - } - args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - if ((error = xfs_alloc_ag_vextent(args))) - goto error0; - break; - case XFS_ALLOCTYPE_START_BNO: - /* - * Try near allocation first, then anywhere-in-ag after - * the first a.g. fails. - */ - if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && - (mp->m_flags & XFS_MOUNT_32BITINODES)) { - args->fsbno = XFS_AGB_TO_FSB(mp, - ((mp->m_agfrotor / rotorstep) % - mp->m_sb.sb_agcount), 0); - bump_rotor = 1; - } - args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - /* FALLTHROUGH */ - case XFS_ALLOCTYPE_ANY_AG: - case XFS_ALLOCTYPE_START_AG: - case XFS_ALLOCTYPE_FIRST_AG: - /* - * Rotate through the allocation groups looking for a winner. - */ - if (type == XFS_ALLOCTYPE_ANY_AG) { - /* - * Start with the last place we left off. - */ - args->agno = sagno = (mp->m_agfrotor / rotorstep) % - mp->m_sb.sb_agcount; - args->type = XFS_ALLOCTYPE_THIS_AG; - flags = XFS_ALLOC_FLAG_TRYLOCK; - } else if (type == XFS_ALLOCTYPE_FIRST_AG) { - /* - * Start with allocation group given by bno. - */ - args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - args->type = XFS_ALLOCTYPE_THIS_AG; - sagno = 0; - flags = 0; - } else { - if (type == XFS_ALLOCTYPE_START_AG) - args->type = XFS_ALLOCTYPE_THIS_AG; - /* - * Start with the given allocation group. - */ - args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); - flags = XFS_ALLOC_FLAG_TRYLOCK; - } - /* - * Loop over allocation groups twice; first time with - * trylock set, second time without. - */ - for (;;) { - args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; - error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; - if (error) { - trace_xfs_alloc_vextent_nofix(args); - goto error0; - } - /* - * If we get a buffer back then the allocation will fly. - */ - if (args->agbp) { - if ((error = xfs_alloc_ag_vextent(args))) - goto error0; - break; - } - - trace_xfs_alloc_vextent_loopfailed(args); - - /* - * Didn't work, figure out the next iteration. - */ - if (args->agno == sagno && - type == XFS_ALLOCTYPE_START_BNO) - args->type = XFS_ALLOCTYPE_THIS_AG; - /* - * For the first allocation, we can try any AG to get - * space. However, if we already have allocated a - * block, we don't want to try AGs whose number is below - * sagno. Otherwise, we may end up with out-of-order - * locking of AGF, which might cause deadlock. - */ - if (++(args->agno) == mp->m_sb.sb_agcount) { - if (args->firstblock != NULLFSBLOCK) - args->agno = sagno; - else - args->agno = 0; - } - /* - * Reached the starting a.g., must either be done - * or switch to non-trylock mode. - */ - if (args->agno == sagno) { - if (no_min == 1) { - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_allfailed(args); - break; - } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } - } - } - xfs_perag_put(args->pag); - } - if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { - if (args->agno == sagno) - mp->m_agfrotor = (mp->m_agfrotor + 1) % - (mp->m_sb.sb_agcount * rotorstep); - else - mp->m_agfrotor = (args->agno * rotorstep + 1) % - (mp->m_sb.sb_agcount * rotorstep); - } - break; - default: - ASSERT(0); - /* NOTREACHED */ - } - if (args->agbno == NULLAGBLOCK) - args->fsbno = NULLFSBLOCK; - else { - args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); -#ifdef DEBUG - ASSERT(args->len >= args->minlen); - ASSERT(args->len <= args->maxlen); - ASSERT(args->agbno % args->alignment == 0); - XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), - args->len); -#endif - } - xfs_perag_put(args->pag); - return 0; -error0: - xfs_perag_put(args->pag); - return error; -} - -/* - * Free an extent. - * Just break up the extent address and hand off to xfs_free_ag_extent - * after fixing up the freelist. - */ -int /* error */ -xfs_free_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t bno, /* starting block number of extent */ - xfs_extlen_t len) /* length of extent */ -{ - xfs_alloc_arg_t args; - int error; - - ASSERT(len != 0); - memset(&args, 0, sizeof(xfs_alloc_arg_t)); - args.tp = tp; - args.mp = tp->t_mountp; - - /* - * validate that the block number is legal - the enables us to detect - * and handle a silent filesystem corruption rather than crashing. - */ - args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - if (args.agno >= args.mp->m_sb.sb_agcount) - return EFSCORRUPTED; - - args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - if (args.agbno >= args.mp->m_sb.sb_agblocks) - return EFSCORRUPTED; - - args.pag = xfs_perag_get(args.mp, args.agno); - ASSERT(args.pag); - - error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); - if (error) - goto error0; - - /* validate the extent size is legal now we have the agf locked */ - if (args.agbno + len > - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { - error = EFSCORRUPTED; - goto error0; - } - - error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); - if (!error) - xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); -error0: - xfs_perag_put(args.pag); - return error; -} diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c deleted file mode 100644 index 8358f1ded94d..000000000000 --- a/fs/xfs/xfs_alloc_btree.c +++ /dev/null @@ -1,504 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_extent_busy.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_trans.h" - - -STATIC struct xfs_btree_cur * -xfs_allocbt_dup_cursor( - struct xfs_btree_cur *cur) -{ - return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, - cur->bc_btnum); -} - -STATIC void -xfs_allocbt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int inc) -{ - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); - int btnum = cur->bc_btnum; - struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); - - ASSERT(ptr->s != 0); - - agf->agf_roots[btnum] = ptr->s; - be32_add_cpu(&agf->agf_levels[btnum], inc); - pag->pagf_levels[btnum] += inc; - xfs_perag_put(pag); - - xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); -} - -STATIC int -xfs_allocbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) -{ - int error; - xfs_agblock_t bno; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - - /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, - &bno, 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - if (bno == NULLAGBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); - - xfs_trans_agbtree_delta(cur->bc_tp, 1); - new->s = cpu_to_be32(bno); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; -} - -STATIC int -xfs_allocbt_free_block( - struct xfs_btree_cur *cur, - struct xfs_buf *bp) -{ - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - xfs_agblock_t bno; - int error; - - bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); - error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); - if (error) - return error; - - xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, - XFS_EXTENT_BUSY_SKIP_DISCARD); - xfs_trans_agbtree_delta(cur->bc_tp, -1); - - xfs_trans_binval(cur->bc_tp, bp); - return 0; -} - -/* - * Update the longest extent in the AGF - */ -STATIC void -xfs_allocbt_update_lastrec( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_rec *rec, - int ptr, - int reason) -{ - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); - struct xfs_perag *pag; - __be32 len; - int numrecs; - - ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); - - switch (reason) { - case LASTREC_UPDATE: - /* - * If this is the last leaf block and it's the last record, - * then update the size of the longest extent in the AG. - */ - if (ptr != xfs_btree_get_numrecs(block)) - return; - len = rec->alloc.ar_blockcount; - break; - case LASTREC_INSREC: - if (be32_to_cpu(rec->alloc.ar_blockcount) <= - be32_to_cpu(agf->agf_longest)) - return; - len = rec->alloc.ar_blockcount; - break; - case LASTREC_DELREC: - numrecs = xfs_btree_get_numrecs(block); - if (ptr <= numrecs) - return; - ASSERT(ptr == numrecs + 1); - - if (numrecs) { - xfs_alloc_rec_t *rrp; - - rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); - len = rrp->ar_blockcount; - } else { - len = 0; - } - - break; - default: - ASSERT(0); - return; - } - - agf->agf_longest = len; - pag = xfs_perag_get(cur->bc_mp, seqno); - pag->pagf_longest = be32_to_cpu(len); - xfs_perag_put(pag); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); -} - -STATIC int -xfs_allocbt_get_minrecs( - struct xfs_btree_cur *cur, - int level) -{ - return cur->bc_mp->m_alloc_mnr[level != 0]; -} - -STATIC int -xfs_allocbt_get_maxrecs( - struct xfs_btree_cur *cur, - int level) -{ - return cur->bc_mp->m_alloc_mxr[level != 0]; -} - -STATIC void -xfs_allocbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - ASSERT(rec->alloc.ar_startblock != 0); - - key->alloc.ar_startblock = rec->alloc.ar_startblock; - key->alloc.ar_blockcount = rec->alloc.ar_blockcount; -} - -STATIC void -xfs_allocbt_init_rec_from_key( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - ASSERT(key->alloc.ar_startblock != 0); - - rec->alloc.ar_startblock = key->alloc.ar_startblock; - rec->alloc.ar_blockcount = key->alloc.ar_blockcount; -} - -STATIC void -xfs_allocbt_init_rec_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec) -{ - ASSERT(cur->bc_rec.a.ar_startblock != 0); - - rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); - rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); -} - -STATIC void -xfs_allocbt_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); - ASSERT(agf->agf_roots[cur->bc_btnum] != 0); - - ptr->s = agf->agf_roots[cur->bc_btnum]; -} - -STATIC __int64_t -xfs_allocbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) -{ - xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; - xfs_alloc_key_t *kp = &key->alloc; - __int64_t diff; - - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return (__int64_t)be32_to_cpu(kp->ar_startblock) - - rec->ar_startblock; - } - - diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; - if (diff) - return diff; - - return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; -} - -static bool -xfs_allocbt_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_perag *pag = bp->b_pag; - unsigned int level; - - /* - * magic number and level verification - * - * During growfs operations, we can't verify the exact level or owner as - * the perag is not fully initialised and hence not attached to the - * buffer. In this case, check against the maximum tree depth. - * - * Similarly, during log recovery we will have a perag structure - * attached, but the agf information will not yet have been initialised - * from the on disk AGF. Again, we can only check against maximum limits - * in this case. - */ - level = be16_to_cpu(block->bb_level); - switch (block->bb_magic) { - case cpu_to_be32(XFS_ABTB_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) - return false; - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; - if (pag && - be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - /* fall through */ - case cpu_to_be32(XFS_ABTB_MAGIC): - if (pag && pag->pagf_init) { - if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) - return false; - } else if (level >= mp->m_ag_maxlevels) - return false; - break; - case cpu_to_be32(XFS_ABTC_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) - return false; - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; - if (pag && - be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - /* fall through */ - case cpu_to_be32(XFS_ABTC_MAGIC): - if (pag && pag->pagf_init) { - if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) - return false; - } else if (level >= mp->m_ag_maxlevels) - return false; - break; - default: - return false; - } - - /* numrecs verification */ - if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) - return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) - return false; - - return true; -} - -static void -xfs_allocbt_read_verify( - struct xfs_buf *bp) -{ - if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } -} - -static void -xfs_allocbt_write_verify( - struct xfs_buf *bp) -{ - if (!xfs_allocbt_verify(bp)) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - xfs_btree_sblock_calc_crc(bp); - -} - -const struct xfs_buf_ops xfs_allocbt_buf_ops = { - .verify_read = xfs_allocbt_read_verify, - .verify_write = xfs_allocbt_write_verify, -}; - - -#if defined(DEBUG) || defined(XFS_WARN) -STATIC int -xfs_allocbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) -{ - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock); - } else { - return be32_to_cpu(k1->alloc.ar_blockcount) < - be32_to_cpu(k2->alloc.ar_blockcount) || - (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && - be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock)); - } -} - -STATIC int -xfs_allocbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) -{ - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(r1->alloc.ar_startblock) + - be32_to_cpu(r1->alloc.ar_blockcount) <= - be32_to_cpu(r2->alloc.ar_startblock); - } else { - return be32_to_cpu(r1->alloc.ar_blockcount) < - be32_to_cpu(r2->alloc.ar_blockcount) || - (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && - be32_to_cpu(r1->alloc.ar_startblock) < - be32_to_cpu(r2->alloc.ar_startblock)); - } -} -#endif /* DEBUG */ - -static const struct xfs_btree_ops xfs_allocbt_ops = { - .rec_len = sizeof(xfs_alloc_rec_t), - .key_len = sizeof(xfs_alloc_key_t), - - .dup_cursor = xfs_allocbt_dup_cursor, - .set_root = xfs_allocbt_set_root, - .alloc_block = xfs_allocbt_alloc_block, - .free_block = xfs_allocbt_free_block, - .update_lastrec = xfs_allocbt_update_lastrec, - .get_minrecs = xfs_allocbt_get_minrecs, - .get_maxrecs = xfs_allocbt_get_maxrecs, - .init_key_from_rec = xfs_allocbt_init_key_from_rec, - .init_rec_from_key = xfs_allocbt_init_rec_from_key, - .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, - .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, - .key_diff = xfs_allocbt_key_diff, - .buf_ops = &xfs_allocbt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_allocbt_keys_inorder, - .recs_inorder = xfs_allocbt_recs_inorder, -#endif -}; - -/* - * Allocate a new allocation btree cursor. - */ -struct xfs_btree_cur * /* new alloc btree cursor */ -xfs_allocbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agf structure */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_btnum_t btnum) /* btree identifier */ -{ - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - struct xfs_btree_cur *cur; - - ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - - if (btnum == XFS_BTNUM_CNT) { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; - } else { - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - } - - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - - if (xfs_sb_version_hascrc(&mp->m_sb)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - - return cur; -} - -/* - * Calculate number of records in an alloc btree block. - */ -int -xfs_allocbt_maxrecs( - struct xfs_mount *mp, - int blocklen, - int leaf) -{ - blocklen -= XFS_ALLOC_BLOCK_LEN(mp); - - if (leaf) - return blocklen / sizeof(xfs_alloc_rec_t); - return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); -} diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c deleted file mode 100644 index 7d95b16f0919..000000000000 --- a/fs/xfs/xfs_attr.c +++ /dev/null @@ -1,1459 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h" -#include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_dinode.h" - -/* - * xfs_attr.c - * - * Provide the external interfaces to manage attribute lists. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Internal routines when attribute list fits inside the inode. - */ -STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); - -/* - * Internal routines when attribute list is one block. - */ -STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); -STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); -STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); - -/* - * Internal routines when attribute list is more than one block. - */ -STATIC int xfs_attr_node_get(xfs_da_args_t *args); -STATIC int xfs_attr_node_addname(xfs_da_args_t *args); -STATIC int xfs_attr_node_removename(xfs_da_args_t *args); -STATIC int xfs_attr_fillstate(xfs_da_state_t *state); -STATIC int xfs_attr_refillstate(xfs_da_state_t *state); - - -STATIC int -xfs_attr_args_init( - struct xfs_da_args *args, - struct xfs_inode *dp, - const unsigned char *name, - int flags) -{ - - if (!name) - return EINVAL; - - memset(args, 0, sizeof(*args)); - args->geo = dp->i_mount->m_attr_geo; - args->whichfork = XFS_ATTR_FORK; - args->dp = dp; - args->flags = flags; - args->name = name; - args->namelen = strlen((const char *)name); - if (args->namelen >= MAXNAMELEN) - return EFAULT; /* match IRIX behaviour */ - - args->hashval = xfs_da_hashname(args->name, args->namelen); - return 0; -} - -int -xfs_inode_hasattr( - struct xfs_inode *ip) -{ - if (!XFS_IFORK_Q(ip) || - (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_anextents == 0)) - return 0; - return 1; -} - -/*======================================================================== - * Overall external interface routines. - *========================================================================*/ - -int -xfs_attr_get( - struct xfs_inode *ip, - const unsigned char *name, - unsigned char *value, - int *valuelenp, - int flags) -{ - struct xfs_da_args args; - uint lock_mode; - int error; - - XFS_STATS_INC(xs_attr_get); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EIO; - - if (!xfs_inode_hasattr(ip)) - return ENOATTR; - - error = xfs_attr_args_init(&args, ip, name, flags); - if (error) - return error; - - args.value = value; - args.valuelen = *valuelenp; - - lock_mode = xfs_ilock_attr_map_shared(ip); - if (!xfs_inode_hasattr(ip)) - error = ENOATTR; - else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) - error = xfs_attr_shortform_getvalue(&args); - else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) - error = xfs_attr_leaf_get(&args); - else - error = xfs_attr_node_get(&args); - xfs_iunlock(ip, lock_mode); - - *valuelenp = args.valuelen; - return error == EEXIST ? 0 : error; -} - -/* - * Calculate how many blocks we need for the new attribute, - */ -STATIC int -xfs_attr_calc_size( - struct xfs_da_args *args, - int *local) -{ - struct xfs_mount *mp = args->dp->i_mount; - int size; - int nblks; - - /* - * Determine space new attribute will use, and if it would be - * "local" or "remote" (note: local != inline). - */ - size = xfs_attr_leaf_newentsize(args, local); - nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); - if (*local) { - if (size > (args->geo->blksize / 2)) { - /* Double split possible */ - nblks *= 2; - } - } else { - /* - * Out of line attribute, cannot double split, but - * make room for the attribute value itself. - */ - uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen); - nblks += dblocks; - nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); - } - - return nblks; -} - -int -xfs_attr_set( - struct xfs_inode *dp, - const unsigned char *name, - unsigned char *value, - int valuelen, - int flags) -{ - struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; - struct xfs_bmap_free flist; - struct xfs_trans_res tres; - xfs_fsblock_t firstblock; - int rsvd = (flags & ATTR_ROOT) != 0; - int error, err2, committed, local; - - XFS_STATS_INC(xs_attr_set); - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; - - error = xfs_attr_args_init(&args, dp, name, flags); - if (error) - return error; - - args.value = value; - args.valuelen = valuelen; - args.firstblock = &firstblock; - args.flist = &flist; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - args.total = xfs_attr_calc_size(&args, &local); - - error = xfs_qm_dqattach(dp, 0); - if (error) - return error; - - /* - * If the inode doesn't have an attribute fork, add one. - * (inode must not be locked when we call this routine) - */ - if (XFS_IFORK_Q(dp) == 0) { - int sf_size = sizeof(xfs_attr_sf_hdr_t) + - XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); - - error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); - if (error) - return error; - } - - /* - * Start our first transaction of the day. - * - * All future transactions during this code must be "chained" off - * this one via the trans_dup() call. All transactions will contain - * the inode, and the inode will always be marked with trans_ihold(). - * Since the inode will be locked in all transactions, we must log - * the inode in every transaction to let it float upward through - * the log. - */ - args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); - - /* - * Root fork attributes can use reserved data blocks for this - * operation if necessary - */ - - if (rsvd) - args.trans->t_flags |= XFS_TRANS_RESERVE; - - tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + - M_RES(mp)->tr_attrsetrt.tr_logres * args.total; - tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - error = xfs_trans_reserve(args.trans, &tres, args.total, 0); - if (error) { - xfs_trans_cancel(args.trans, 0); - return error; - } - xfs_ilock(dp, XFS_ILOCK_EXCL); - - error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, - rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); - return error; - } - - xfs_trans_ijoin(args.trans, dp, 0); - - /* - * If the attribute list is non-existent or a shortform list, - * upgrade it to a single-leaf-block attribute list. - */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { - - /* - * Build initial attribute list (if required). - */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) - xfs_attr_shortform_create(&args); - - /* - * Try to add the attr to the attribute list in - * the inode. - */ - error = xfs_attr_shortform_addname(&args); - if (error != ENOSPC) { - /* - * Commit the shortform mods, and we're done. - * NOTE: this is also the error path (EEXIST, etc). - */ - ASSERT(args.trans != NULL); - - /* - * If this is a synchronous mount, make sure that - * the transaction goes to disk before returning - * to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_trans_ichgtime(args.trans, dp, - XFS_ICHGTIME_CHG); - } - err2 = xfs_trans_commit(args.trans, - XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - return error ? error : err2; - } - - /* - * It won't fit in the shortform, transform to a leaf block. - * GROT: another possible req'mt for a double-split btree op. - */ - xfs_bmap_init(args.flist, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); - if (!error) { - error = xfs_bmap_finish(&args.trans, args.flist, - &committed); - } - if (error) { - ASSERT(committed); - args.trans = NULL; - xfs_bmap_cancel(&flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args.trans, dp, 0); - - /* - * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. - */ - - error = xfs_trans_roll(&args.trans, dp); - if (error) - goto out; - - } - - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) - error = xfs_attr_leaf_addname(&args); - else - error = xfs_attr_node_addname(&args); - if (error) - goto out; - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); - - /* - * Commit the last in the sequence of transactions. - */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - return error; - -out: - if (args.trans) { - xfs_trans_cancel(args.trans, - XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - } - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; -} - -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -int -xfs_attr_remove( - struct xfs_inode *dp, - const unsigned char *name, - int flags) -{ - struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; - struct xfs_bmap_free flist; - xfs_fsblock_t firstblock; - int error; - - XFS_STATS_INC(xs_attr_remove); - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; - - if (!xfs_inode_hasattr(dp)) - return ENOATTR; - - error = xfs_attr_args_init(&args, dp, name, flags); - if (error) - return error; - - args.firstblock = &firstblock; - args.flist = &flist; - - /* - * we have no control over the attribute names that userspace passes us - * to remove, so we have to allow the name lookup prior to attribute - * removal to fail. - */ - args.op_flags = XFS_DA_OP_OKNOENT; - - error = xfs_qm_dqattach(dp, 0); - if (error) - return error; - - /* - * Start our first transaction of the day. - * - * All future transactions during this code must be "chained" off - * this one via the trans_dup() call. All transactions will contain - * the inode, and the inode will always be marked with trans_ihold(). - * Since the inode will be locked in all transactions, we must log - * the inode in every transaction to let it float upward through - * the log. - */ - args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM); - - /* - * Root fork attributes can use reserved data blocks for this - * operation if necessary - */ - - if (flags & ATTR_ROOT) - args.trans->t_flags |= XFS_TRANS_RESERVE; - - error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, - XFS_ATTRRM_SPACE_RES(mp), 0); - if (error) { - xfs_trans_cancel(args.trans, 0); - return error; - } - - xfs_ilock(dp, XFS_ILOCK_EXCL); - /* - * No need to make quota reservations here. We expect to release some - * blocks not allocate in the common case. - */ - xfs_trans_ijoin(args.trans, dp, 0); - - if (!xfs_inode_hasattr(dp)) { - error = ENOATTR; - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); - error = xfs_attr_shortform_remove(&args); - } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - error = xfs_attr_leaf_removename(&args); - } else { - error = xfs_attr_node_removename(&args); - } - - if (error) - goto out; - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); - - /* - * Commit the last in the sequence of transactions. - */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - return error; - -out: - if (args.trans) { - xfs_trans_cancel(args.trans, - XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - } - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; -} - -/*======================================================================== - * External routines when attribute list is inside the inode - *========================================================================*/ - -/* - * Add a name to the shortform attribute list structure - * This is the external routine. - */ -STATIC int -xfs_attr_shortform_addname(xfs_da_args_t *args) -{ - int newsize, forkoff, retval; - - trace_xfs_attr_sf_addname(args); - - retval = xfs_attr_shortform_lookup(args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - return retval; - } else if (retval == EEXIST) { - if (args->flags & ATTR_CREATE) - return retval; - retval = xfs_attr_shortform_remove(args); - ASSERT(retval == 0); - } - - if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || - args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return ENOSPC; - - newsize = XFS_ATTR_SF_TOTSIZE(args->dp); - newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); - - forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); - if (!forkoff) - return ENOSPC; - - xfs_attr_shortform_add(args, forkoff); - return 0; -} - - -/*======================================================================== - * External routines when attribute list is one block - *========================================================================*/ - -/* - * Add a name to the leaf attribute list structure - * - * This leaf block cannot have a "remote" value, we only call this routine - * if bmap_one_block() says there is only one block (ie: no remote blks). - */ -STATIC int -xfs_attr_leaf_addname(xfs_da_args_t *args) -{ - xfs_inode_t *dp; - struct xfs_buf *bp; - int retval, error, committed, forkoff; - - trace_xfs_attr_leaf_addname(args); - - /* - * Read the (only) block in the attribute list in. - */ - dp = args->dp; - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); - if (error) - return error; - - /* - * Look up the given attribute in the leaf block. Figure out if - * the given flags produce an error or call for an atomic rename. - */ - retval = xfs_attr3_leaf_lookup_int(bp, args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - xfs_trans_brelse(args->trans, bp); - return retval; - } else if (retval == EEXIST) { - if (args->flags & ATTR_CREATE) { /* pure create op */ - xfs_trans_brelse(args->trans, bp); - return retval; - } - - trace_xfs_attr_leaf_replace(args); - - /* save the attribute state for later removal*/ - args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ - args->blkno2 = args->blkno; /* set 2nd entry info*/ - args->index2 = args->index; - args->rmtblkno2 = args->rmtblkno; - args->rmtblkcnt2 = args->rmtblkcnt; - args->rmtvaluelen2 = args->rmtvaluelen; - - /* - * clear the remote attr state now that it is saved so that the - * values reflect the state of the attribute we are about to - * add, not the attribute we just found and will remove later. - */ - args->rmtblkno = 0; - args->rmtblkcnt = 0; - args->rmtvaluelen = 0; - } - - /* - * Add the attribute to the leaf block, transitioning to a Btree - * if required. - */ - retval = xfs_attr3_leaf_add(bp, args); - if (retval == ENOSPC) { - /* - * Promote the attribute list to the Btree format, then - * Commit that transaction so that the node_addname() call - * can manage its own transactions. - */ - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_attr3_leaf_to_node(args); - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - return error; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - - /* - * Commit the current trans (including the inode) and start - * a new one. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - return error; - - /* - * Fob the whole rest of the problem off on the Btree code. - */ - error = xfs_attr_node_addname(args); - return error; - } - - /* - * Commit the transaction that added the attr name so that - * later routines can manage their own transactions. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - return error; - - /* - * If there was an out-of-line value, allocate the blocks we - * identified for its storage and copy the value. This is done - * after we create the attribute so that we don't overflow the - * maximum size of a transaction and/or hit a deadlock. - */ - if (args->rmtblkno > 0) { - error = xfs_attr_rmtval_set(args); - if (error) - return error; - } - - /* - * If this is an atomic rename operation, we must "flip" the - * incomplete flags on the "new" and "old" attribute/value pairs - * so that one disappears and one appears atomically. Then we - * must remove the "old" attribute/value pair. - */ - if (args->op_flags & XFS_DA_OP_RENAME) { - /* - * In a separate transaction, set the incomplete flag on the - * "old" attr and clear the incomplete flag on the "new" attr. - */ - error = xfs_attr3_leaf_flipflags(args); - if (error) - return error; - - /* - * Dismantle the "old" attribute/value pair by removing - * a "remote" value (if it exists). - */ - args->index = args->index2; - args->blkno = args->blkno2; - args->rmtblkno = args->rmtblkno2; - args->rmtblkcnt = args->rmtblkcnt2; - args->rmtvaluelen = args->rmtvaluelen2; - if (args->rmtblkno) { - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } - - /* - * Read in the block containing the "old" attr, then - * remove the "old" attr from that block (neat, huh!) - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, - -1, &bp); - if (error) - return error; - - xfs_attr3_leaf_remove(bp, args); - - /* - * If the result is small enough, shrink it all into the inode. - */ - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - if (!error) { - error = xfs_bmap_finish(&args->trans, - args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - return error; - } - - /* - * bmap_finish() may have committed the last trans - * and started a new one. We need the inode to be - * in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - } - - /* - * Commit the remove and start the next trans in series. - */ - error = xfs_trans_roll(&args->trans, dp); - - } else if (args->rmtblkno > 0) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - error = xfs_attr3_leaf_clearflag(args); - } - return error; -} - -/* - * Remove a name from the leaf attribute list structure - * - * This leaf block cannot have a "remote" value, we only call this routine - * if bmap_one_block() says there is only one block (ie: no remote blks). - */ -STATIC int -xfs_attr_leaf_removename(xfs_da_args_t *args) -{ - xfs_inode_t *dp; - struct xfs_buf *bp; - int error, committed, forkoff; - - trace_xfs_attr_leaf_removename(args); - - /* - * Remove the attribute. - */ - dp = args->dp; - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); - if (error) - return error; - - error = xfs_attr3_leaf_lookup_int(bp, args); - if (error == ENOATTR) { - xfs_trans_brelse(args->trans, bp); - return error; - } - - xfs_attr3_leaf_remove(bp, args); - - /* - * If the result is small enough, shrink it all into the inode. - */ - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - return error; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - } - return 0; -} - -/* - * Look up a name in a leaf attribute list structure. - * - * This leaf block cannot have a "remote" value, we only call this routine - * if bmap_one_block() says there is only one block (ie: no remote blks). - */ -STATIC int -xfs_attr_leaf_get(xfs_da_args_t *args) -{ - struct xfs_buf *bp; - int error; - - trace_xfs_attr_leaf_get(args); - - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); - if (error) - return error; - - error = xfs_attr3_leaf_lookup_int(bp, args); - if (error != EEXIST) { - xfs_trans_brelse(args->trans, bp); - return error; - } - error = xfs_attr3_leaf_getvalue(bp, args); - xfs_trans_brelse(args->trans, bp); - if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { - error = xfs_attr_rmtval_get(args); - } - return error; -} - -/*======================================================================== - * External routines when attribute list size > geo->blksize - *========================================================================*/ - -/* - * Add a name to a Btree-format attribute list. - * - * This will involve walking down the Btree, and may involve splitting - * leaf nodes and even splitting intermediate nodes up to and including - * the root node (a special case of an intermediate node). - * - * "Remote" attribute values confuse the issue and atomic rename operations - * add a whole extra layer of confusion on top of that. - */ -STATIC int -xfs_attr_node_addname(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - xfs_inode_t *dp; - xfs_mount_t *mp; - int committed, retval, error; - - trace_xfs_attr_node_addname(args); - - /* - * Fill in bucket of arguments/results/context to carry around. - */ - dp = args->dp; - mp = dp->i_mount; -restart: - state = xfs_da_state_alloc(); - state->args = args; - state->mp = mp; - - /* - * Search to see if name already exists, and get back a pointer - * to where it should go. - */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error) - goto out; - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - goto out; - } else if (retval == EEXIST) { - if (args->flags & ATTR_CREATE) - goto out; - - trace_xfs_attr_node_replace(args); - - /* save the attribute state for later removal*/ - args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ - args->blkno2 = args->blkno; /* set 2nd entry info*/ - args->index2 = args->index; - args->rmtblkno2 = args->rmtblkno; - args->rmtblkcnt2 = args->rmtblkcnt; - args->rmtvaluelen2 = args->rmtvaluelen; - - /* - * clear the remote attr state now that it is saved so that the - * values reflect the state of the attribute we are about to - * add, not the attribute we just found and will remove later. - */ - args->rmtblkno = 0; - args->rmtblkcnt = 0; - args->rmtvaluelen = 0; - } - - retval = xfs_attr3_leaf_add(blk->bp, state->args); - if (retval == ENOSPC) { - if (state->path.active == 1) { - /* - * Its really a single leaf node, but it had - * out-of-line values so it looked like it *might* - * have been a b-tree. - */ - xfs_da_state_free(state); - state = NULL; - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_attr3_leaf_to_node(args); - if (!error) { - error = xfs_bmap_finish(&args->trans, - args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans - * and started a new one. We need the inode to be - * in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - - /* - * Commit the node conversion and start the next - * trans in the chain. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - goto out; - - goto restart; - } - - /* - * Split as many Btree elements as required. - * This code tracks the new and old attr's location - * in the index/blkno/rmtblkno/rmtblkcnt fields and - * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. - */ - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_da3_split(state); - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - } else { - /* - * Addition succeeded, update Btree hashvals. - */ - xfs_da3_fixhashpath(state, &state->path); - } - - /* - * Kill the state structure, we're done with it and need to - * allow the buffers to come back later. - */ - xfs_da_state_free(state); - state = NULL; - - /* - * Commit the leaf addition or btree split and start the next - * trans in the chain. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - goto out; - - /* - * If there was an out-of-line value, allocate the blocks we - * identified for its storage and copy the value. This is done - * after we create the attribute so that we don't overflow the - * maximum size of a transaction and/or hit a deadlock. - */ - if (args->rmtblkno > 0) { - error = xfs_attr_rmtval_set(args); - if (error) - return error; - } - - /* - * If this is an atomic rename operation, we must "flip" the - * incomplete flags on the "new" and "old" attribute/value pairs - * so that one disappears and one appears atomically. Then we - * must remove the "old" attribute/value pair. - */ - if (args->op_flags & XFS_DA_OP_RENAME) { - /* - * In a separate transaction, set the incomplete flag on the - * "old" attr and clear the incomplete flag on the "new" attr. - */ - error = xfs_attr3_leaf_flipflags(args); - if (error) - goto out; - - /* - * Dismantle the "old" attribute/value pair by removing - * a "remote" value (if it exists). - */ - args->index = args->index2; - args->blkno = args->blkno2; - args->rmtblkno = args->rmtblkno2; - args->rmtblkcnt = args->rmtblkcnt2; - args->rmtvaluelen = args->rmtvaluelen2; - if (args->rmtblkno) { - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } - - /* - * Re-find the "old" attribute entry after any split ops. - * The INCOMPLETE flag means that we will find the "old" - * attr, not the "new" one. - */ - args->flags |= XFS_ATTR_INCOMPLETE; - state = xfs_da_state_alloc(); - state->args = args; - state->mp = mp; - state->inleaf = 0; - error = xfs_da3_node_lookup_int(state, &retval); - if (error) - goto out; - - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_attr3_leaf_remove(blk->bp, args); - xfs_da3_fixhashpath(state, &state->path); - - /* - * Check to see if the tree needs to be collapsed. - */ - if (retval && (state->path.active > 1)) { - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_da3_join(state); - if (!error) { - error = xfs_bmap_finish(&args->trans, - args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans - * and started a new one. We need the inode to be - * in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - } - - /* - * Commit and start the next trans in the chain. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - goto out; - - } else if (args->rmtblkno > 0) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - error = xfs_attr3_leaf_clearflag(args); - if (error) - goto out; - } - retval = error = 0; - -out: - if (state) - xfs_da_state_free(state); - if (error) - return error; - return retval; -} - -/* - * Remove a name from a B-tree attribute list. - * - * This will involve walking down the Btree, and may involve joining - * leaf nodes and even joining intermediate nodes up to and including - * the root node (a special case of an intermediate node). - */ -STATIC int -xfs_attr_node_removename(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - xfs_inode_t *dp; - struct xfs_buf *bp; - int retval, error, committed, forkoff; - - trace_xfs_attr_node_removename(args); - - /* - * Tie a string around our finger to remind us where we are. - */ - dp = args->dp; - state = xfs_da_state_alloc(); - state->args = args; - state->mp = dp->i_mount; - - /* - * Search to see if name exists, and get back a pointer to it. - */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error || (retval != EEXIST)) { - if (error == 0) - error = retval; - goto out; - } - - /* - * If there is an out-of-line value, de-allocate the blocks. - * This is done before we remove the attribute so that we don't - * overflow the maximum size of a transaction and/or hit a deadlock. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->bp != NULL); - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if (args->rmtblkno > 0) { - /* - * Fill in disk block numbers in the state structure - * so that we can get the buffers back after we commit - * several transactions in the following calls. - */ - error = xfs_attr_fillstate(state); - if (error) - goto out; - - /* - * Mark the attribute as INCOMPLETE, then bunmapi() the - * remote value. - */ - error = xfs_attr3_leaf_setflag(args); - if (error) - goto out; - error = xfs_attr_rmtval_remove(args); - if (error) - goto out; - - /* - * Refill the state structure with buffers, the prior calls - * released our buffers. - */ - error = xfs_attr_refillstate(state); - if (error) - goto out; - } - - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - retval = xfs_attr3_leaf_remove(blk->bp, args); - xfs_da3_fixhashpath(state, &state->path); - - /* - * Check to see if the tree needs to be collapsed. - */ - if (retval && (state->path.active > 1)) { - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_da3_join(state); - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - - /* - * Commit the Btree join operation and start a new trans. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - goto out; - } - - /* - * If the result is small enough, push it all into the inode. - */ - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - /* - * Have to get rid of the copy of this dabuf in the state. - */ - ASSERT(state->path.active == 1); - ASSERT(state->path.blk[0].bp); - state->path.blk[0].bp = NULL; - - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp); - if (error) - goto out; - - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - if (!error) { - error = xfs_bmap_finish(&args->trans, - args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - goto out; - } - - /* - * bmap_finish() may have committed the last trans - * and started a new one. We need the inode to be - * in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - } else - xfs_trans_brelse(args->trans, bp); - } - error = 0; - -out: - xfs_da_state_free(state); - return error; -} - -/* - * Fill in the disk block numbers in the state structure for the buffers - * that are attached to the state structure. - * This is done so that we can quickly reattach ourselves to those buffers - * after some set of transaction commits have released these buffers. - */ -STATIC int -xfs_attr_fillstate(xfs_da_state_t *state) -{ - xfs_da_state_path_t *path; - xfs_da_state_blk_t *blk; - int level; - - trace_xfs_attr_fillstate(state->args); - - /* - * Roll down the "path" in the state structure, storing the on-disk - * block number for those buffers in the "path". - */ - path = &state->path; - ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); - for (blk = path->blk, level = 0; level < path->active; blk++, level++) { - if (blk->bp) { - blk->disk_blkno = XFS_BUF_ADDR(blk->bp); - blk->bp = NULL; - } else { - blk->disk_blkno = 0; - } - } - - /* - * Roll down the "altpath" in the state structure, storing the on-disk - * block number for those buffers in the "altpath". - */ - path = &state->altpath; - ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); - for (blk = path->blk, level = 0; level < path->active; blk++, level++) { - if (blk->bp) { - blk->disk_blkno = XFS_BUF_ADDR(blk->bp); - blk->bp = NULL; - } else { - blk->disk_blkno = 0; - } - } - - return 0; -} - -/* - * Reattach the buffers to the state structure based on the disk block - * numbers stored in the state structure. - * This is done after some set of transaction commits have released those - * buffers from our grip. - */ -STATIC int -xfs_attr_refillstate(xfs_da_state_t *state) -{ - xfs_da_state_path_t *path; - xfs_da_state_blk_t *blk; - int level, error; - - trace_xfs_attr_refillstate(state->args); - - /* - * Roll down the "path" in the state structure, storing the on-disk - * block number for those buffers in the "path". - */ - path = &state->path; - ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); - for (blk = path->blk, level = 0; level < path->active; blk++, level++) { - if (blk->disk_blkno) { - error = xfs_da3_node_read(state->args->trans, - state->args->dp, - blk->blkno, blk->disk_blkno, - &blk->bp, XFS_ATTR_FORK); - if (error) - return error; - } else { - blk->bp = NULL; - } - } - - /* - * Roll down the "altpath" in the state structure, storing the on-disk - * block number for those buffers in the "altpath". - */ - path = &state->altpath; - ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); - for (blk = path->blk, level = 0; level < path->active; blk++, level++) { - if (blk->disk_blkno) { - error = xfs_da3_node_read(state->args->trans, - state->args->dp, - blk->blkno, blk->disk_blkno, - &blk->bp, XFS_ATTR_FORK); - if (error) - return error; - } else { - blk->bp = NULL; - } - } - - return 0; -} - -/* - * Look up a filename in a node attribute list. - * - * This routine gets called for any attribute fork that has more than one - * block, ie: both true Btree attr lists and for single-leaf-blocks with - * "remote" values taking up more blocks. - */ -STATIC int -xfs_attr_node_get(xfs_da_args_t *args) -{ - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - int error, retval; - int i; - - trace_xfs_attr_node_get(args); - - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - - /* - * Search to see if name exists, and get back a pointer to it. - */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error) { - retval = error; - } else if (retval == EEXIST) { - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->bp != NULL); - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - - /* - * Get the value, local or "remote" - */ - retval = xfs_attr3_leaf_getvalue(blk->bp, args); - if (!retval && (args->rmtblkno > 0) - && !(args->flags & ATTR_KERNOVAL)) { - retval = xfs_attr_rmtval_get(args); - } - } - - /* - * If not in a transaction, we have to release all the buffers. - */ - for (i = 0; i < state->path.active; i++) { - xfs_trans_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - - xfs_da_state_free(state); - return retval; -} diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c deleted file mode 100644 index 127d96aba845..000000000000 --- a/fs/xfs/xfs_attr_leaf.c +++ /dev/null @@ -1,2697 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" -#include "xfs_bmap.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" -#include "xfs_dinode.h" -#include "xfs_dir2.h" - - -/* - * xfs_attr_leaf.c - * - * Routines to implement leaf blocks of attributes as Btrees of hashed names. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Routines used for growing the Btree. - */ -STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args, - xfs_dablk_t which_block, struct xfs_buf **bpp); -STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, - struct xfs_attr3_icleaf_hdr *ichdr, - struct xfs_da_args *args, int freemap_index); -STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args, - struct xfs_attr3_icleaf_hdr *ichdr, - struct xfs_buf *leaf_buffer); -STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state, - xfs_da_state_blk_t *blk1, - xfs_da_state_blk_t *blk2); -STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state, - xfs_da_state_blk_t *leaf_blk_1, - struct xfs_attr3_icleaf_hdr *ichdr1, - xfs_da_state_blk_t *leaf_blk_2, - struct xfs_attr3_icleaf_hdr *ichdr2, - int *number_entries_in_blk1, - int *number_usedbytes_in_blk1); - -/* - * Utility routines. - */ -STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, - struct xfs_attr_leafblock *src_leaf, - struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, - struct xfs_attr_leafblock *dst_leaf, - struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, - int move_count); -STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); - -void -xfs_attr3_leaf_hdr_from_disk( - struct xfs_attr3_icleaf_hdr *to, - struct xfs_attr_leafblock *from) -{ - int i; - - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || - from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); - - if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) { - struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->count); - to->usedbytes = be16_to_cpu(hdr3->usedbytes); - to->firstused = be16_to_cpu(hdr3->firstused); - to->holes = hdr3->holes; - - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base); - to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size); - } - return; - } - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.count); - to->usedbytes = be16_to_cpu(from->hdr.usedbytes); - to->firstused = be16_to_cpu(from->hdr.firstused); - to->holes = from->hdr.holes; - - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base); - to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size); - } -} - -void -xfs_attr3_leaf_hdr_to_disk( - struct xfs_attr_leafblock *to, - struct xfs_attr3_icleaf_hdr *from) -{ - int i; - - ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || - from->magic == XFS_ATTR3_LEAF_MAGIC); - - if (from->magic == XFS_ATTR3_LEAF_MAGIC) { - struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to; - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->count = cpu_to_be16(from->count); - hdr3->usedbytes = cpu_to_be16(from->usedbytes); - hdr3->firstused = cpu_to_be16(from->firstused); - hdr3->holes = from->holes; - hdr3->pad1 = 0; - - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); - hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); - } - return; - } - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.count = cpu_to_be16(from->count); - to->hdr.usedbytes = cpu_to_be16(from->usedbytes); - to->hdr.firstused = cpu_to_be16(from->firstused); - to->hdr.holes = from->holes; - to->hdr.pad1 = 0; - - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); - to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); - } -} - -static bool -xfs_attr3_leaf_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_attr3_icleaf_hdr ichdr; - - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - - if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) - return false; - - if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; - } else { - if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) - return false; - } - if (ichdr.count == 0) - return false; - - /* XXX: need to range check rest of attr header values */ - /* XXX: hash order check? */ - - return true; -} - -static void -xfs_attr3_leaf_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; - - if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); -} - -/* - * leaf/node format detection on trees is sketchy, so a node read can be done on - * leaf level blocks when detection identifies the tree as a node format tree - * incorrectly. In this case, we need to swap the verifier to match the correct - * format of the block being read. - */ -static void -xfs_attr3_leaf_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { - .verify_read = xfs_attr3_leaf_read_verify, - .verify_write = xfs_attr3_leaf_write_verify, -}; - -int -xfs_attr3_leaf_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp) -{ - int err; - - err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, - XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); - if (!err && tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); - return err; -} - -/*======================================================================== - * Namespace helper routines - *========================================================================*/ - -/* - * If namespace bits don't match return 0. - * If all match then return 1. - */ -STATIC int -xfs_attr_namesp_match(int arg_flags, int ondisk_flags) -{ - return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); -} - - -/*======================================================================== - * External routines when attribute fork size < XFS_LITINO(mp). - *========================================================================*/ - -/* - * Query whether the requested number of additional bytes of extended - * attribute space will be able to fit inline. - * - * Returns zero if not, else the di_forkoff fork offset to be used in the - * literal area for attribute data once the new bytes have been added. - * - * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value; - * special case for dev/uuid inodes, they have fixed size data forks. - */ -int -xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) -{ - int offset; - int minforkoff; /* lower limit on valid forkoff locations */ - int maxforkoff; /* upper limit on valid forkoff locations */ - int dsize; - xfs_mount_t *mp = dp->i_mount; - - /* rounded down */ - offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; - - switch (dp->i_d.di_format) { - case XFS_DINODE_FMT_DEV: - minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; - return (offset >= minforkoff) ? minforkoff : 0; - case XFS_DINODE_FMT_UUID: - minforkoff = roundup(sizeof(uuid_t), 8) >> 3; - return (offset >= minforkoff) ? minforkoff : 0; - } - - /* - * If the requested numbers of bytes is smaller or equal to the - * current attribute fork size we can always proceed. - * - * Note that if_bytes in the data fork might actually be larger than - * the current data fork size is due to delalloc extents. In that - * case either the extent count will go down when they are converted - * to real extents, or the delalloc conversion will take care of the - * literal area rebalancing. - */ - if (bytes <= XFS_IFORK_ASIZE(dp)) - return dp->i_d.di_forkoff; - - /* - * For attr2 we can try to move the forkoff if there is space in the - * literal area, but for the old format we are done if there is no - * space in the fixed attribute fork. - */ - if (!(mp->m_flags & XFS_MOUNT_ATTR2)) - return 0; - - dsize = dp->i_df.if_bytes; - - switch (dp->i_d.di_format) { - case XFS_DINODE_FMT_EXTENTS: - /* - * If there is no attr fork and the data fork is extents, - * determine if creating the default attr fork will result - * in the extents form migrating to btree. If so, the - * minimum offset only needs to be the space required for - * the btree root. - */ - if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > - xfs_default_attroffset(dp)) - dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); - break; - case XFS_DINODE_FMT_BTREE: - /* - * If we have a data btree then keep forkoff if we have one, - * otherwise we are adding a new attr, so then we set - * minforkoff to where the btree root can finish so we have - * plenty of room for attrs - */ - if (dp->i_d.di_forkoff) { - if (offset < dp->i_d.di_forkoff) - return 0; - return dp->i_d.di_forkoff; - } - dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot); - break; - } - - /* - * A data fork btree root must have space for at least - * MINDBTPTRS key/ptr pairs if the data fork is small or empty. - */ - minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); - minforkoff = roundup(minforkoff, 8) >> 3; - - /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); - maxforkoff = maxforkoff >> 3; /* rounded down */ - - if (offset >= maxforkoff) - return maxforkoff; - if (offset >= minforkoff) - return offset; - return 0; -} - -/* - * Switch on the ATTR2 superblock bit (implies also FEATURES2) - */ -STATIC void -xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) -{ - if ((mp->m_flags & XFS_MOUNT_ATTR2) && - !(xfs_sb_version_hasattr2(&mp->m_sb))) { - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasattr2(&mp->m_sb)) { - xfs_sb_version_addattr2(&mp->m_sb); - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); - } else - spin_unlock(&mp->m_sb_lock); - } -} - -/* - * Create the initial contents of a shortform attribute list. - */ -void -xfs_attr_shortform_create(xfs_da_args_t *args) -{ - xfs_attr_sf_hdr_t *hdr; - xfs_inode_t *dp; - xfs_ifork_t *ifp; - - trace_xfs_attr_sf_create(args); - - dp = args->dp; - ASSERT(dp != NULL); - ifp = dp->i_afp; - ASSERT(ifp != NULL); - ASSERT(ifp->if_bytes == 0); - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) { - ifp->if_flags &= ~XFS_IFEXTENTS; /* just in case */ - dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; - ifp->if_flags |= XFS_IFINLINE; - } else { - ASSERT(ifp->if_flags & XFS_IFINLINE); - } - xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK); - hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data; - hdr->count = 0; - hdr->totsize = cpu_to_be16(sizeof(*hdr)); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); -} - -/* - * Add a name/value pair to the shortform attribute list. - * Overflow from the inode has already been checked for. - */ -void -xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) -{ - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int i, offset, size; - xfs_mount_t *mp; - xfs_inode_t *dp; - xfs_ifork_t *ifp; - - trace_xfs_attr_sf_add(args); - - dp = args->dp; - mp = dp->i_mount; - dp->i_d.di_forkoff = forkoff; - - ifp = dp->i_afp; - ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { -#ifdef DEBUG - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - ASSERT(0); -#endif - } - - offset = (char *)sfe - (char *)sf; - size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset); - - sfe->namelen = args->namelen; - sfe->valuelen = args->valuelen; - sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); - memcpy(sfe->nameval, args->name, args->namelen); - memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); - sf->hdr.count++; - be16_add_cpu(&sf->hdr.totsize, size); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); - - xfs_sbversion_add_attr2(mp, args->trans); -} - -/* - * After the last attribute is removed revert to original inode format, - * making all literal area available to the data fork once more. - */ -STATIC void -xfs_attr_fork_reset( - struct xfs_inode *ip, - struct xfs_trans *tp) -{ - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - ip->i_d.di_forkoff = 0; - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; - - ASSERT(ip->i_d.di_anextents == 0); - ASSERT(ip->i_afp == NULL); - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -} - -/* - * Remove an attribute from the shortform attribute list structure. - */ -int -xfs_attr_shortform_remove(xfs_da_args_t *args) -{ - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int base, size=0, end, totsize, i; - xfs_mount_t *mp; - xfs_inode_t *dp; - - trace_xfs_attr_sf_remove(args); - - dp = args->dp; - mp = dp->i_mount; - base = sizeof(xfs_attr_sf_hdr_t); - sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; - sfe = &sf->list[0]; - end = sf->hdr.count; - for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), - base += size, i++) { - size = XFS_ATTR_SF_ENTSIZE(sfe); - if (sfe->namelen != args->namelen) - continue; - if (memcmp(sfe->nameval, args->name, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - break; - } - if (i == end) - return ENOATTR; - - /* - * Fix up the attribute fork data, covering the hole - */ - end = base + size; - totsize = be16_to_cpu(sf->hdr.totsize); - if (end != totsize) - memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); - sf->hdr.count--; - be16_add_cpu(&sf->hdr.totsize, -size); - - /* - * Fix up the start offset of the attribute fork - */ - totsize -= size; - if (totsize == sizeof(xfs_attr_sf_hdr_t) && - (mp->m_flags & XFS_MOUNT_ATTR2) && - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && - !(args->op_flags & XFS_DA_OP_ADDNAME)) { - xfs_attr_fork_reset(dp, args->trans); - } else { - xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); - dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); - ASSERT(dp->i_d.di_forkoff); - ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || - (args->op_flags & XFS_DA_OP_ADDNAME) || - !(mp->m_flags & XFS_MOUNT_ATTR2) || - dp->i_d.di_format == XFS_DINODE_FMT_BTREE); - xfs_trans_log_inode(args->trans, dp, - XFS_ILOG_CORE | XFS_ILOG_ADATA); - } - - xfs_sbversion_add_attr2(mp, args->trans); - - return 0; -} - -/* - * Look up a name in a shortform attribute list structure. - */ -/*ARGSUSED*/ -int -xfs_attr_shortform_lookup(xfs_da_args_t *args) -{ - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int i; - xfs_ifork_t *ifp; - - trace_xfs_attr_sf_lookup(args); - - ifp = args->dp->i_afp; - ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - return EEXIST; - } - return ENOATTR; -} - -/* - * Look up a name in a shortform attribute list structure. - */ -/*ARGSUSED*/ -int -xfs_attr_shortform_getvalue(xfs_da_args_t *args) -{ - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int i; - - ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = sfe->valuelen; - return EEXIST; - } - if (args->valuelen < sfe->valuelen) { - args->valuelen = sfe->valuelen; - return ERANGE; - } - args->valuelen = sfe->valuelen; - memcpy(args->value, &sfe->nameval[args->namelen], - args->valuelen); - return EEXIST; - } - return ENOATTR; -} - -/* - * Convert from using the shortform to the leaf. - */ -int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) -{ - xfs_inode_t *dp; - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - xfs_da_args_t nargs; - char *tmpbuffer; - int error, i, size; - xfs_dablk_t blkno; - struct xfs_buf *bp; - xfs_ifork_t *ifp; - - trace_xfs_attr_sf_to_leaf(args); - - dp = args->dp; - ifp = dp->i_afp; - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - size = be16_to_cpu(sf->hdr.totsize); - tmpbuffer = kmem_alloc(size, KM_SLEEP); - ASSERT(tmpbuffer != NULL); - memcpy(tmpbuffer, ifp->if_u1.if_data, size); - sf = (xfs_attr_shortform_t *)tmpbuffer; - - xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); - - bp = NULL; - error = xfs_da_grow_inode(args, &blkno); - if (error) { - /* - * If we hit an IO error middle of the transaction inside - * grow_inode(), we may have inconsistent data. Bail out. - */ - if (error == EIO) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ - goto out; - } - - ASSERT(blkno == 0); - error = xfs_attr3_leaf_create(args, blkno, &bp); - if (error) { - error = xfs_da_shrink_inode(args, 0, bp); - bp = NULL; - if (error) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ - goto out; - } - - memset((char *)&nargs, 0, sizeof(nargs)); - nargs.dp = dp; - nargs.geo = args->geo; - nargs.firstblock = args->firstblock; - nargs.flist = args->flist; - nargs.total = args->total; - nargs.whichfork = XFS_ATTR_FORK; - nargs.trans = args->trans; - nargs.op_flags = XFS_DA_OP_OKNOENT; - - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; i++) { - nargs.name = sfe->nameval; - nargs.namelen = sfe->namelen; - nargs.value = &sfe->nameval[nargs.namelen]; - nargs.valuelen = sfe->valuelen; - nargs.hashval = xfs_da_hashname(sfe->nameval, - sfe->namelen); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); - error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ - ASSERT(error == ENOATTR); - error = xfs_attr3_leaf_add(bp, &nargs); - ASSERT(error != ENOSPC); - if (error) - goto out; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); - } - error = 0; - -out: - kmem_free(tmpbuffer); - return error; -} - -/* - * Check a leaf attribute block to see if all the entries would fit into - * a shortform attribute list. - */ -int -xfs_attr_shortform_allfit( - struct xfs_buf *bp, - struct xfs_inode *dp) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr_leaf_entry *entry; - xfs_attr_leaf_name_local_t *name_loc; - struct xfs_attr3_icleaf_hdr leafhdr; - int bytes; - int i; - - leaf = bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); - entry = xfs_attr3_leaf_entryp(leaf); - - bytes = sizeof(struct xfs_attr_sf_hdr); - for (i = 0; i < leafhdr.count; entry++, i++) { - if (entry->flags & XFS_ATTR_INCOMPLETE) - continue; /* don't copy partial entries */ - if (!(entry->flags & XFS_ATTR_LOCAL)) - return 0; - name_loc = xfs_attr3_leaf_name_local(leaf, i); - if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return 0; - if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) - return 0; - bytes += sizeof(struct xfs_attr_sf_entry) - 1 - + name_loc->namelen - + be16_to_cpu(name_loc->valuelen); - } - if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && - (bytes == sizeof(struct xfs_attr_sf_hdr))) - return -1; - return xfs_attr_shortform_bytesfit(dp, bytes); -} - -/* - * Convert a leaf attribute list to shortform attribute list - */ -int -xfs_attr3_leaf_to_shortform( - struct xfs_buf *bp, - struct xfs_da_args *args, - int forkoff) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_name_local *name_loc; - struct xfs_da_args nargs; - struct xfs_inode *dp = args->dp; - char *tmpbuffer; - int error; - int i; - - trace_xfs_attr_leaf_to_sf(args); - - tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); - if (!tmpbuffer) - return ENOMEM; - - memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); - - leaf = (xfs_attr_leafblock_t *)tmpbuffer; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - entry = xfs_attr3_leaf_entryp(leaf); - - /* XXX (dgc): buffer is about to be marked stale - why zero it? */ - memset(bp->b_addr, 0, args->geo->blksize); - - /* - * Clean out the prior contents of the attribute list. - */ - error = xfs_da_shrink_inode(args, 0, bp); - if (error) - goto out; - - if (forkoff == -1) { - ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); - ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); - xfs_attr_fork_reset(dp, args->trans); - goto out; - } - - xfs_attr_shortform_create(args); - - /* - * Copy the attributes - */ - memset((char *)&nargs, 0, sizeof(nargs)); - nargs.geo = args->geo; - nargs.dp = dp; - nargs.firstblock = args->firstblock; - nargs.flist = args->flist; - nargs.total = args->total; - nargs.whichfork = XFS_ATTR_FORK; - nargs.trans = args->trans; - nargs.op_flags = XFS_DA_OP_OKNOENT; - - for (i = 0; i < ichdr.count; entry++, i++) { - if (entry->flags & XFS_ATTR_INCOMPLETE) - continue; /* don't copy partial entries */ - if (!entry->nameidx) - continue; - ASSERT(entry->flags & XFS_ATTR_LOCAL); - name_loc = xfs_attr3_leaf_name_local(leaf, i); - nargs.name = name_loc->nameval; - nargs.namelen = name_loc->namelen; - nargs.value = &name_loc->nameval[nargs.namelen]; - nargs.valuelen = be16_to_cpu(name_loc->valuelen); - nargs.hashval = be32_to_cpu(entry->hashval); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); - xfs_attr_shortform_add(&nargs, forkoff); - } - error = 0; - -out: - kmem_free(tmpbuffer); - return error; -} - -/* - * Convert from using a single leaf to a root node and a leaf. - */ -int -xfs_attr3_leaf_to_node( - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr icleafhdr; - struct xfs_attr_leaf_entry *entries; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr icnodehdr; - struct xfs_da_intnode *node; - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - struct xfs_buf *bp1 = NULL; - struct xfs_buf *bp2 = NULL; - xfs_dablk_t blkno; - int error; - - trace_xfs_attr_leaf_to_node(args); - - error = xfs_da_grow_inode(args, &blkno); - if (error) - goto out; - error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1); - if (error) - goto out; - - error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK); - if (error) - goto out; - - /* copy leaf to new buffer, update identifiers */ - xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); - bp2->b_ops = bp1->b_ops; - memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; - hdr3->blkno = cpu_to_be64(bp2->b_bn); - } - xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); - - /* - * Set up the new root node. - */ - error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); - if (error) - goto out; - node = bp1->b_addr; - dp->d_ops->node_hdr_from_disk(&icnodehdr, node); - btree = dp->d_ops->node_tree_p(node); - - leaf = bp2->b_addr; - xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); - entries = xfs_attr3_leaf_entryp(leaf); - - /* both on-disk, don't endian-flip twice */ - btree[0].hashval = entries[icleafhdr.count - 1].hashval; - btree[0].before = cpu_to_be32(blkno); - icnodehdr.count = 1; - dp->d_ops->node_hdr_to_disk(node, &icnodehdr); - xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); - error = 0; -out: - return error; -} - -/*======================================================================== - * Routines used for growing the Btree. - *========================================================================*/ - -/* - * Create the initial contents of a leaf attribute list - * or a leaf in a node attribute list. - */ -STATIC int -xfs_attr3_leaf_create( - struct xfs_da_args *args, - xfs_dablk_t blkno, - struct xfs_buf **bpp) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - struct xfs_buf *bp; - int error; - - trace_xfs_attr_leaf_create(args); - - error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, - XFS_ATTR_FORK); - if (error) - return error; - bp->b_ops = &xfs_attr3_leaf_buf_ops; - xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF); - leaf = bp->b_addr; - memset(leaf, 0, args->geo->blksize); - - memset(&ichdr, 0, sizeof(ichdr)); - ichdr.firstused = args->geo->blksize; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_da3_blkinfo *hdr3 = bp->b_addr; - - ichdr.magic = XFS_ATTR3_LEAF_MAGIC; - - hdr3->blkno = cpu_to_be64(bp->b_bn); - hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); - - ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr); - } else { - ichdr.magic = XFS_ATTR_LEAF_MAGIC; - ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr); - } - ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; - - xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); - xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); - - *bpp = bp; - return 0; -} - -/* - * Split the leaf node, rebalance, then add the new entry. - */ -int -xfs_attr3_leaf_split( - struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk) -{ - xfs_dablk_t blkno; - int error; - - trace_xfs_attr_leaf_split(state->args); - - /* - * Allocate space for a new leaf node. - */ - ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_da_grow_inode(state->args, &blkno); - if (error) - return error; - error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); - if (error) - return error; - newblk->blkno = blkno; - newblk->magic = XFS_ATTR_LEAF_MAGIC; - - /* - * Rebalance the entries across the two leaves. - * NOTE: rebalance() currently depends on the 2nd block being empty. - */ - xfs_attr3_leaf_rebalance(state, oldblk, newblk); - error = xfs_da3_blk_link(state, oldblk, newblk); - if (error) - return error; - - /* - * Save info on "old" attribute for "atomic rename" ops, leaf_add() - * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the - * "new" attrs info. Will need the "old" info to remove it later. - * - * Insert the "new" entry in the correct block. - */ - if (state->inleaf) { - trace_xfs_attr_leaf_add_old(state->args); - error = xfs_attr3_leaf_add(oldblk->bp, state->args); - } else { - trace_xfs_attr_leaf_add_new(state->args); - error = xfs_attr3_leaf_add(newblk->bp, state->args); - } - - /* - * Update last hashval in each block since we added the name. - */ - oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); - newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); - return error; -} - -/* - * Add a name to the leaf attribute list structure. - */ -int -xfs_attr3_leaf_add( - struct xfs_buf *bp, - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - int tablesize; - int entsize; - int sum; - int tmp; - int i; - - trace_xfs_attr_leaf_add(args); - - leaf = bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(args->index >= 0 && args->index <= ichdr.count); - entsize = xfs_attr_leaf_newentsize(args, NULL); - - /* - * Search through freemap for first-fit on new name length. - * (may need to figure in size of entry struct too) - */ - tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); - for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { - if (tablesize > ichdr.firstused) { - sum += ichdr.freemap[i].size; - continue; - } - if (!ichdr.freemap[i].size) - continue; /* no space in this map */ - tmp = entsize; - if (ichdr.freemap[i].base < ichdr.firstused) - tmp += sizeof(xfs_attr_leaf_entry_t); - if (ichdr.freemap[i].size >= tmp) { - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i); - goto out_log_hdr; - } - sum += ichdr.freemap[i].size; - } - - /* - * If there are no holes in the address space of the block, - * and we don't have enough freespace, then compaction will do us - * no good and we should just give up. - */ - if (!ichdr.holes && sum < entsize) - return ENOSPC; - - /* - * Compact the entries to coalesce free space. - * This may change the hdr->count via dropping INCOMPLETE entries. - */ - xfs_attr3_leaf_compact(args, &ichdr, bp); - - /* - * After compaction, the block is guaranteed to have only one - * free region, in freemap[0]. If it is not big enough, give up. - */ - if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { - tmp = ENOSPC; - goto out_log_hdr; - } - - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); - -out_log_hdr: - xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, &leaf->hdr, - xfs_attr3_leaf_hdr_size(leaf))); - return tmp; -} - -/* - * Add a name to a leaf attribute list structure. - */ -STATIC int -xfs_attr3_leaf_add_work( - struct xfs_buf *bp, - struct xfs_attr3_icleaf_hdr *ichdr, - struct xfs_da_args *args, - int mapindex) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_name_local *name_loc; - struct xfs_attr_leaf_name_remote *name_rmt; - struct xfs_mount *mp; - int tmp; - int i; - - trace_xfs_attr_leaf_add_work(args); - - leaf = bp->b_addr; - ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE); - ASSERT(args->index >= 0 && args->index <= ichdr->count); - - /* - * Force open some space in the entry array and fill it in. - */ - entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; - if (args->index < ichdr->count) { - tmp = ichdr->count - args->index; - tmp *= sizeof(xfs_attr_leaf_entry_t); - memmove(entry + 1, entry, tmp); - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); - } - ichdr->count++; - - /* - * Allocate space for the new string (at the end of the run). - */ - mp = args->trans->t_mountp; - ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize); - ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0); - ASSERT(ichdr->freemap[mapindex].size >= - xfs_attr_leaf_newentsize(args, NULL)); - ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize); - ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); - - ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp); - - entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + - ichdr->freemap[mapindex].size); - entry->hashval = cpu_to_be32(args->hashval); - entry->flags = tmp ? XFS_ATTR_LOCAL : 0; - entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); - if (args->op_flags & XFS_DA_OP_RENAME) { - entry->flags |= XFS_ATTR_INCOMPLETE; - if ((args->blkno2 == args->blkno) && - (args->index2 <= args->index)) { - args->index2++; - } - } - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); - ASSERT((args->index == 0) || - (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); - ASSERT((args->index == ichdr->count - 1) || - (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); - - /* - * For "remote" attribute values, simply note that we need to - * allocate space for the "remote" value. We can't actually - * allocate the extents in this transaction, and we can't decide - * which blocks they should be as we might allocate more blocks - * as part of this transaction (a split operation for example). - */ - if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf, args->index); - name_loc->namelen = args->namelen; - name_loc->valuelen = cpu_to_be16(args->valuelen); - memcpy((char *)name_loc->nameval, args->name, args->namelen); - memcpy((char *)&name_loc->nameval[args->namelen], args->value, - be16_to_cpu(name_loc->valuelen)); - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - name_rmt->namelen = args->namelen; - memcpy((char *)name_rmt->name, args->name, args->namelen); - entry->flags |= XFS_ATTR_INCOMPLETE; - /* just in case */ - name_rmt->valuelen = 0; - name_rmt->valueblk = 0; - args->rmtblkno = 1; - args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); - args->rmtvaluelen = args->valuelen; - } - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), - xfs_attr_leaf_entsize(leaf, args->index))); - - /* - * Update the control info for this leaf node - */ - if (be16_to_cpu(entry->nameidx) < ichdr->firstused) - ichdr->firstused = be16_to_cpu(entry->nameidx); - - ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf)); - tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); - - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (ichdr->freemap[i].base == tmp) { - ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); - ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t); - } - } - ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); - return 0; -} - -/* - * Garbage collect a leaf attribute list block by copying it to a new buffer. - */ -STATIC void -xfs_attr3_leaf_compact( - struct xfs_da_args *args, - struct xfs_attr3_icleaf_hdr *ichdr_dst, - struct xfs_buf *bp) -{ - struct xfs_attr_leafblock *leaf_src; - struct xfs_attr_leafblock *leaf_dst; - struct xfs_attr3_icleaf_hdr ichdr_src; - struct xfs_trans *trans = args->trans; - char *tmpbuffer; - - trace_xfs_attr_leaf_compact(args); - - tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); - memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); - memset(bp->b_addr, 0, args->geo->blksize); - leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; - leaf_dst = bp->b_addr; - - /* - * Copy the on-disk header back into the destination buffer to ensure - * all the information in the header that is not part of the incore - * header structure is preserved. - */ - memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src)); - - /* Initialise the incore headers */ - ichdr_src = *ichdr_dst; /* struct copy */ - ichdr_dst->firstused = args->geo->blksize; - ichdr_dst->usedbytes = 0; - ichdr_dst->count = 0; - ichdr_dst->holes = 0; - ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); - ichdr_dst->freemap[0].size = ichdr_dst->firstused - - ichdr_dst->freemap[0].base; - - /* write the header back to initialise the underlying buffer */ - xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); - - /* - * Copy all entry's in the same (sorted) order, - * but allocate name/value pairs packed and in sequence. - */ - xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, - leaf_dst, ichdr_dst, 0, ichdr_src.count); - /* - * this logs the entire buffer, but the caller must write the header - * back to the buffer when it is finished modifying it. - */ - xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); - - kmem_free(tmpbuffer); -} - -/* - * Compare two leaf blocks "order". - * Return 0 unless leaf2 should go before leaf1. - */ -static int -xfs_attr3_leaf_order( - struct xfs_buf *leaf1_bp, - struct xfs_attr3_icleaf_hdr *leaf1hdr, - struct xfs_buf *leaf2_bp, - struct xfs_attr3_icleaf_hdr *leaf2hdr) -{ - struct xfs_attr_leaf_entry *entries1; - struct xfs_attr_leaf_entry *entries2; - - entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr); - entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr); - if (leaf1hdr->count > 0 && leaf2hdr->count > 0 && - ((be32_to_cpu(entries2[0].hashval) < - be32_to_cpu(entries1[0].hashval)) || - (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) < - be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) { - return 1; - } - return 0; -} - -int -xfs_attr_leaf_order( - struct xfs_buf *leaf1_bp, - struct xfs_buf *leaf2_bp) -{ - struct xfs_attr3_icleaf_hdr ichdr1; - struct xfs_attr3_icleaf_hdr ichdr2; - - xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); - xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); - return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); -} - -/* - * Redistribute the attribute list entries between two leaf nodes, - * taking into account the size of the new entry. - * - * NOTE: if new block is empty, then it will get the upper half of the - * old block. At present, all (one) callers pass in an empty second block. - * - * This code adjusts the args->index/blkno and args->index2/blkno2 fields - * to match what it is doing in splitting the attribute leaf block. Those - * values are used in "atomic rename" operations on attributes. Note that - * the "new" and "old" values can end up in different blocks. - */ -STATIC void -xfs_attr3_leaf_rebalance( - struct xfs_da_state *state, - struct xfs_da_state_blk *blk1, - struct xfs_da_state_blk *blk2) -{ - struct xfs_da_args *args; - struct xfs_attr_leafblock *leaf1; - struct xfs_attr_leafblock *leaf2; - struct xfs_attr3_icleaf_hdr ichdr1; - struct xfs_attr3_icleaf_hdr ichdr2; - struct xfs_attr_leaf_entry *entries1; - struct xfs_attr_leaf_entry *entries2; - int count; - int totallen; - int max; - int space; - int swap; - - /* - * Set up environment. - */ - ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); - ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); - leaf1 = blk1->bp->b_addr; - leaf2 = blk2->bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); - xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); - ASSERT(ichdr2.count == 0); - args = state->args; - - trace_xfs_attr_leaf_rebalance(args); - - /* - * Check ordering of blocks, reverse if it makes things simpler. - * - * NOTE: Given that all (current) callers pass in an empty - * second block, this code should never set "swap". - */ - swap = 0; - if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) { - struct xfs_da_state_blk *tmp_blk; - struct xfs_attr3_icleaf_hdr tmp_ichdr; - - tmp_blk = blk1; - blk1 = blk2; - blk2 = tmp_blk; - - /* struct copies to swap them rather than reconverting */ - tmp_ichdr = ichdr1; - ichdr1 = ichdr2; - ichdr2 = tmp_ichdr; - - leaf1 = blk1->bp->b_addr; - leaf2 = blk2->bp->b_addr; - swap = 1; - } - - /* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. Then get - * the direction to copy and the number of elements to move. - * - * "inleaf" is true if the new entry should be inserted into blk1. - * If "swap" is also true, then reverse the sense of "inleaf". - */ - state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1, - blk2, &ichdr2, - &count, &totallen); - if (swap) - state->inleaf = !state->inleaf; - - /* - * Move any entries required from leaf to leaf: - */ - if (count < ichdr1.count) { - /* - * Figure the total bytes to be added to the destination leaf. - */ - /* number entries being moved */ - count = ichdr1.count - count; - space = ichdr1.usedbytes - totallen; - space += count * sizeof(xfs_attr_leaf_entry_t); - - /* - * leaf2 is the destination, compact it if it looks tight. - */ - max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); - if (space > max) - xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); - - /* - * Move high entries from leaf1 to low end of leaf2. - */ - xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, - ichdr1.count - count, leaf2, &ichdr2, 0, count); - - } else if (count > ichdr1.count) { - /* - * I assert that since all callers pass in an empty - * second buffer, this code should never execute. - */ - ASSERT(0); - - /* - * Figure the total bytes to be added to the destination leaf. - */ - /* number entries being moved */ - count -= ichdr1.count; - space = totallen - ichdr1.usedbytes; - space += count * sizeof(xfs_attr_leaf_entry_t); - - /* - * leaf1 is the destination, compact it if it looks tight. - */ - max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); - if (space > max) - xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); - - /* - * Move low entries from leaf2 to high end of leaf1. - */ - xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, - ichdr1.count, count); - } - - xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); - xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); - xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); - xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); - - /* - * Copy out last hashval in each block for B-tree code. - */ - entries1 = xfs_attr3_leaf_entryp(leaf1); - entries2 = xfs_attr3_leaf_entryp(leaf2); - blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval); - blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval); - - /* - * Adjust the expected index for insertion. - * NOTE: this code depends on the (current) situation that the - * second block was originally empty. - * - * If the insertion point moved to the 2nd block, we must adjust - * the index. We must also track the entry just following the - * new entry for use in an "atomic rename" operation, that entry - * is always the "old" entry and the "new" entry is what we are - * inserting. The index/blkno fields refer to the "old" entry, - * while the index2/blkno2 fields refer to the "new" entry. - */ - if (blk1->index > ichdr1.count) { - ASSERT(state->inleaf == 0); - blk2->index = blk1->index - ichdr1.count; - args->index = args->index2 = blk2->index; - args->blkno = args->blkno2 = blk2->blkno; - } else if (blk1->index == ichdr1.count) { - if (state->inleaf) { - args->index = blk1->index; - args->blkno = blk1->blkno; - args->index2 = 0; - args->blkno2 = blk2->blkno; - } else { - /* - * On a double leaf split, the original attr location - * is already stored in blkno2/index2, so don't - * overwrite it overwise we corrupt the tree. - */ - blk2->index = blk1->index - ichdr1.count; - args->index = blk2->index; - args->blkno = blk2->blkno; - if (!state->extravalid) { - /* - * set the new attr location to match the old - * one and let the higher level split code - * decide where in the leaf to place it. - */ - args->index2 = blk2->index; - args->blkno2 = blk2->blkno; - } - } - } else { - ASSERT(state->inleaf == 1); - args->index = args->index2 = blk1->index; - args->blkno = args->blkno2 = blk1->blkno; - } -} - -/* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. - * GROT: Is this really necessary? With other than a 512 byte blocksize, - * GROT: there will always be enough room in either block for a new entry. - * GROT: Do a double-split for this case? - */ -STATIC int -xfs_attr3_leaf_figure_balance( - struct xfs_da_state *state, - struct xfs_da_state_blk *blk1, - struct xfs_attr3_icleaf_hdr *ichdr1, - struct xfs_da_state_blk *blk2, - struct xfs_attr3_icleaf_hdr *ichdr2, - int *countarg, - int *usedbytesarg) -{ - struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr; - struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr; - struct xfs_attr_leaf_entry *entry; - int count; - int max; - int index; - int totallen = 0; - int half; - int lastdelta; - int foundit = 0; - int tmp; - - /* - * Examine entries until we reduce the absolute difference in - * byte usage between the two blocks to a minimum. - */ - max = ichdr1->count + ichdr2->count; - half = (max + 1) * sizeof(*entry); - half += ichdr1->usedbytes + ichdr2->usedbytes + - xfs_attr_leaf_newentsize(state->args, NULL); - half /= 2; - lastdelta = state->args->geo->blksize; - entry = xfs_attr3_leaf_entryp(leaf1); - for (count = index = 0; count < max; entry++, index++, count++) { - -#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) - /* - * The new entry is in the first block, account for it. - */ - if (count == blk1->index) { - tmp = totallen + sizeof(*entry) + - xfs_attr_leaf_newentsize(state->args, NULL); - if (XFS_ATTR_ABS(half - tmp) > lastdelta) - break; - lastdelta = XFS_ATTR_ABS(half - tmp); - totallen = tmp; - foundit = 1; - } - - /* - * Wrap around into the second block if necessary. - */ - if (count == ichdr1->count) { - leaf1 = leaf2; - entry = xfs_attr3_leaf_entryp(leaf1); - index = 0; - } - - /* - * Figure out if next leaf entry would be too much. - */ - tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1, - index); - if (XFS_ATTR_ABS(half - tmp) > lastdelta) - break; - lastdelta = XFS_ATTR_ABS(half - tmp); - totallen = tmp; -#undef XFS_ATTR_ABS - } - - /* - * Calculate the number of usedbytes that will end up in lower block. - * If new entry not in lower block, fix up the count. - */ - totallen -= count * sizeof(*entry); - if (foundit) { - totallen -= sizeof(*entry) + - xfs_attr_leaf_newentsize(state->args, NULL); - } - - *countarg = count; - *usedbytesarg = totallen; - return foundit; -} - -/*======================================================================== - * Routines used for shrinking the Btree. - *========================================================================*/ - -/* - * Check a leaf block and its neighbors to see if the block should be - * collapsed into one or the other neighbor. Always keep the block - * with the smaller block number. - * If the current block is over 50% full, don't try to join it, return 0. - * If the block is empty, fill in the state structure and return 2. - * If it can be collapsed, fill in the state structure and return 1. - * If nothing can be done, return 0. - * - * GROT: allow for INCOMPLETE entries in calculation. - */ -int -xfs_attr3_leaf_toosmall( - struct xfs_da_state *state, - int *action) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_da_state_blk *blk; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_buf *bp; - xfs_dablk_t blkno; - int bytes; - int forward; - int error; - int retval; - int i; - - trace_xfs_attr_leaf_toosmall(state->args); - - /* - * Check for the degenerate case of the block being over 50% full. - * If so, it's not worth even looking to see if we might be able - * to coalesce with a sibling. - */ - blk = &state->path.blk[ state->path.active-1 ]; - leaf = blk->bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - bytes = xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t) + - ichdr.usedbytes; - if (bytes > (state->args->geo->blksize >> 1)) { - *action = 0; /* blk over 50%, don't try to join */ - return 0; - } - - /* - * Check for the degenerate case of the block being empty. - * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (arbitrarily) - * to merge with the forward block unless it is NULL. - */ - if (ichdr.count == 0) { - /* - * Make altpath point to the block we want to keep and - * path point to the block we want to drop (this one). - */ - forward = (ichdr.forw != 0); - memcpy(&state->altpath, &state->path, sizeof(state->path)); - error = xfs_da3_path_shift(state, &state->altpath, forward, - 0, &retval); - if (error) - return error; - if (retval) { - *action = 0; - } else { - *action = 2; - } - return 0; - } - - /* - * Examine each sibling block to see if we can coalesce with - * at least 25% free space to spare. We need to figure out - * whether to merge with the forward or the backward block. - * We prefer coalescing with the lower numbered sibling so as - * to shrink an attribute list over time. - */ - /* start with smaller blk num */ - forward = ichdr.forw < ichdr.back; - for (i = 0; i < 2; forward = !forward, i++) { - struct xfs_attr3_icleaf_hdr ichdr2; - if (forward) - blkno = ichdr.forw; - else - blkno = ichdr.back; - if (blkno == 0) - continue; - error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, - blkno, -1, &bp); - if (error) - return error; - - xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); - - bytes = state->args->geo->blksize - - (state->args->geo->blksize >> 2) - - ichdr.usedbytes - ichdr2.usedbytes - - ((ichdr.count + ichdr2.count) * - sizeof(xfs_attr_leaf_entry_t)) - - xfs_attr3_leaf_hdr_size(leaf); - - xfs_trans_brelse(state->args->trans, bp); - if (bytes >= 0) - break; /* fits with at least 25% to spare */ - } - if (i >= 2) { - *action = 0; - return 0; - } - - /* - * Make altpath point to the block we want to keep (the lower - * numbered block) and path point to the block we want to drop. - */ - memcpy(&state->altpath, &state->path, sizeof(state->path)); - if (blkno < blk->blkno) { - error = xfs_da3_path_shift(state, &state->altpath, forward, - 0, &retval); - } else { - error = xfs_da3_path_shift(state, &state->path, forward, - 0, &retval); - } - if (error) - return error; - if (retval) { - *action = 0; - } else { - *action = 1; - } - return 0; -} - -/* - * Remove a name from the leaf attribute list structure. - * - * Return 1 if leaf is less than 37% full, 0 if >= 37% full. - * If two leaves are 37% full, when combined they will leave 25% free. - */ -int -xfs_attr3_leaf_remove( - struct xfs_buf *bp, - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entry; - int before; - int after; - int smallest; - int entsize; - int tablesize; - int tmp; - int i; - - trace_xfs_attr_leaf_remove(args); - - leaf = bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - - ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); - ASSERT(args->index >= 0 && args->index < ichdr.count); - ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + - xfs_attr3_leaf_hdr_size(leaf)); - - entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; - - ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); - - /* - * Scan through free region table: - * check for adjacency of free'd entry with an existing one, - * find smallest free region in case we need to replace it, - * adjust any map that borders the entry table, - */ - tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); - tmp = ichdr.freemap[0].size; - before = after = -1; - smallest = XFS_ATTR_LEAF_MAPSIZE - 1; - entsize = xfs_attr_leaf_entsize(leaf, args->index); - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - ASSERT(ichdr.freemap[i].base < args->geo->blksize); - ASSERT(ichdr.freemap[i].size < args->geo->blksize); - if (ichdr.freemap[i].base == tablesize) { - ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t); - ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); - } - - if (ichdr.freemap[i].base + ichdr.freemap[i].size == - be16_to_cpu(entry->nameidx)) { - before = i; - } else if (ichdr.freemap[i].base == - (be16_to_cpu(entry->nameidx) + entsize)) { - after = i; - } else if (ichdr.freemap[i].size < tmp) { - tmp = ichdr.freemap[i].size; - smallest = i; - } - } - - /* - * Coalesce adjacent freemap regions, - * or replace the smallest region. - */ - if ((before >= 0) || (after >= 0)) { - if ((before >= 0) && (after >= 0)) { - ichdr.freemap[before].size += entsize; - ichdr.freemap[before].size += ichdr.freemap[after].size; - ichdr.freemap[after].base = 0; - ichdr.freemap[after].size = 0; - } else if (before >= 0) { - ichdr.freemap[before].size += entsize; - } else { - ichdr.freemap[after].base = be16_to_cpu(entry->nameidx); - ichdr.freemap[after].size += entsize; - } - } else { - /* - * Replace smallest region (if it is smaller than free'd entry) - */ - if (ichdr.freemap[smallest].size < entsize) { - ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx); - ichdr.freemap[smallest].size = entsize; - } - } - - /* - * Did we remove the first entry? - */ - if (be16_to_cpu(entry->nameidx) == ichdr.firstused) - smallest = 1; - else - smallest = 0; - - /* - * Compress the remaining entries and zero out the removed stuff. - */ - memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize); - ichdr.usedbytes -= entsize; - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), - entsize)); - - tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t); - memmove(entry, entry + 1, tmp); - ichdr.count--; - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t))); - - entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count]; - memset(entry, 0, sizeof(xfs_attr_leaf_entry_t)); - - /* - * If we removed the first entry, re-find the first used byte - * in the name area. Note that if the entry was the "firstused", - * then we don't have a "hole" in our block resulting from - * removing the name. - */ - if (smallest) { - tmp = args->geo->blksize; - entry = xfs_attr3_leaf_entryp(leaf); - for (i = ichdr.count - 1; i >= 0; entry++, i--) { - ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); - - if (be16_to_cpu(entry->nameidx) < tmp) - tmp = be16_to_cpu(entry->nameidx); - } - ichdr.firstused = tmp; - if (!ichdr.firstused) - ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN; - } else { - ichdr.holes = 1; /* mark as needing compaction */ - } - xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, &leaf->hdr, - xfs_attr3_leaf_hdr_size(leaf))); - - /* - * Check if leaf is less than 50% full, caller may want to - * "join" the leaf with a sibling if so. - */ - tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t); - - return tmp < args->geo->magicpct; /* leaf is < 37% full */ -} - -/* - * Move all the attribute list entries from drop_leaf into save_leaf. - */ -void -xfs_attr3_leaf_unbalance( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk) -{ - struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr; - struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr; - struct xfs_attr3_icleaf_hdr drophdr; - struct xfs_attr3_icleaf_hdr savehdr; - struct xfs_attr_leaf_entry *entry; - - trace_xfs_attr_leaf_unbalance(state->args); - - drop_leaf = drop_blk->bp->b_addr; - save_leaf = save_blk->bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); - xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); - entry = xfs_attr3_leaf_entryp(drop_leaf); - - /* - * Save last hashval from dying block for later Btree fixup. - */ - drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval); - - /* - * Check if we need a temp buffer, or can we do it in place. - * Note that we don't check "leaf" for holes because we will - * always be dropping it, toosmall() decided that for us already. - */ - if (savehdr.holes == 0) { - /* - * dest leaf has no holes, so we add there. May need - * to make some room in the entry array. - */ - if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, - drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(state->args, - drop_leaf, &drophdr, 0, - save_leaf, &savehdr, 0, - drophdr.count); - } else { - xfs_attr3_leaf_moveents(state->args, - drop_leaf, &drophdr, 0, - save_leaf, &savehdr, - savehdr.count, drophdr.count); - } - } else { - /* - * Destination has holes, so we make a temporary copy - * of the leaf and add them both to that. - */ - struct xfs_attr_leafblock *tmp_leaf; - struct xfs_attr3_icleaf_hdr tmphdr; - - tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); - - /* - * Copy the header into the temp leaf so that all the stuff - * not in the incore header is present and gets copied back in - * once we've moved all the entries. - */ - memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf)); - - memset(&tmphdr, 0, sizeof(tmphdr)); - tmphdr.magic = savehdr.magic; - tmphdr.forw = savehdr.forw; - tmphdr.back = savehdr.back; - tmphdr.firstused = state->args->geo->blksize; - - /* write the header to the temp buffer to initialise it */ - xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); - - if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, - drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(state->args, - drop_leaf, &drophdr, 0, - tmp_leaf, &tmphdr, 0, - drophdr.count); - xfs_attr3_leaf_moveents(state->args, - save_leaf, &savehdr, 0, - tmp_leaf, &tmphdr, tmphdr.count, - savehdr.count); - } else { - xfs_attr3_leaf_moveents(state->args, - save_leaf, &savehdr, 0, - tmp_leaf, &tmphdr, 0, - savehdr.count); - xfs_attr3_leaf_moveents(state->args, - drop_leaf, &drophdr, 0, - tmp_leaf, &tmphdr, tmphdr.count, - drophdr.count); - } - memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); - savehdr = tmphdr; /* struct copy */ - kmem_free(tmp_leaf); - } - - xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); - xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, - state->args->geo->blksize - 1); - - /* - * Copy out last hashval in each block for B-tree code. - */ - entry = xfs_attr3_leaf_entryp(save_leaf); - save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval); -} - -/*======================================================================== - * Routines used for finding things in the Btree. - *========================================================================*/ - -/* - * Look up a name in a leaf attribute list structure. - * This is the internal routine, it uses the caller's buffer. - * - * Note that duplicate keys are allowed, but only check within the - * current leaf node. The Btree code must check in adjacent leaf nodes. - * - * Return in args->index the index into the entry[] array of either - * the found entry, or where the entry should have been (insert before - * that entry). - * - * Don't change the args->value unless we find the attribute. - */ -int -xfs_attr3_leaf_lookup_int( - struct xfs_buf *bp, - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_entry *entries; - struct xfs_attr_leaf_name_local *name_loc; - struct xfs_attr_leaf_name_remote *name_rmt; - xfs_dahash_t hashval; - int probe; - int span; - - trace_xfs_attr_leaf_lookup(args); - - leaf = bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); - - /* - * Binary search. (note: small blocks will skip this loop) - */ - hashval = args->hashval; - probe = span = ichdr.count / 2; - for (entry = &entries[probe]; span > 4; entry = &entries[probe]) { - span /= 2; - if (be32_to_cpu(entry->hashval) < hashval) - probe += span; - else if (be32_to_cpu(entry->hashval) > hashval) - probe -= span; - else - break; - } - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); - - /* - * Since we may have duplicate hashval's, find the first matching - * hashval in the leaf. - */ - while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) { - entry--; - probe--; - } - while (probe < ichdr.count && - be32_to_cpu(entry->hashval) < hashval) { - entry++; - probe++; - } - if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { - args->index = probe; - return ENOATTR; - } - - /* - * Duplicate keys may be present, so search all of them for a match. - */ - for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval); - entry++, probe++) { -/* - * GROT: Add code to remove incomplete entries. - */ - /* - * If we are looking for INCOMPLETE entries, show only those. - * If we are looking for complete entries, show only those. - */ - if ((args->flags & XFS_ATTR_INCOMPLETE) != - (entry->flags & XFS_ATTR_INCOMPLETE)) { - continue; - } - if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf, probe); - if (name_loc->namelen != args->namelen) - continue; - if (memcmp(args->name, name_loc->nameval, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) - continue; - args->index = probe; - return EEXIST; - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); - if (name_rmt->namelen != args->namelen) - continue; - if (memcmp(args->name, name_rmt->name, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) - continue; - args->index = probe; - args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); - args->rmtblkno = be32_to_cpu(name_rmt->valueblk); - args->rmtblkcnt = xfs_attr3_rmt_blocks( - args->dp->i_mount, - args->rmtvaluelen); - return EEXIST; - } - } - args->index = probe; - return ENOATTR; -} - -/* - * Get the value associated with an attribute name from a leaf attribute - * list structure. - */ -int -xfs_attr3_leaf_getvalue( - struct xfs_buf *bp, - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_name_local *name_loc; - struct xfs_attr_leaf_name_remote *name_rmt; - int valuelen; - - leaf = bp->b_addr; - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); - ASSERT(args->index < ichdr.count); - - entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; - if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf, args->index); - ASSERT(name_loc->namelen == args->namelen); - ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); - valuelen = be16_to_cpu(name_loc->valuelen); - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = valuelen; - return 0; - } - if (args->valuelen < valuelen) { - args->valuelen = valuelen; - return ERANGE; - } - args->valuelen = valuelen; - memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - ASSERT(name_rmt->namelen == args->namelen); - ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); - args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); - args->rmtblkno = be32_to_cpu(name_rmt->valueblk); - args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, - args->rmtvaluelen); - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = args->rmtvaluelen; - return 0; - } - if (args->valuelen < args->rmtvaluelen) { - args->valuelen = args->rmtvaluelen; - return ERANGE; - } - args->valuelen = args->rmtvaluelen; - } - return 0; -} - -/*======================================================================== - * Utility routines. - *========================================================================*/ - -/* - * Move the indicated entries from one leaf to another. - * NOTE: this routine modifies both source and destination leaves. - */ -/*ARGSUSED*/ -STATIC void -xfs_attr3_leaf_moveents( - struct xfs_da_args *args, - struct xfs_attr_leafblock *leaf_s, - struct xfs_attr3_icleaf_hdr *ichdr_s, - int start_s, - struct xfs_attr_leafblock *leaf_d, - struct xfs_attr3_icleaf_hdr *ichdr_d, - int start_d, - int count) -{ - struct xfs_attr_leaf_entry *entry_s; - struct xfs_attr_leaf_entry *entry_d; - int desti; - int tmp; - int i; - - /* - * Check for nothing to do. - */ - if (count == 0) - return; - - /* - * Set up environment. - */ - ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || - ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); - ASSERT(ichdr_s->magic == ichdr_d->magic); - ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); - ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) - + xfs_attr3_leaf_hdr_size(leaf_s)); - ASSERT(ichdr_d->count < args->geo->blksize / 8); - ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) - + xfs_attr3_leaf_hdr_size(leaf_d)); - - ASSERT(start_s < ichdr_s->count); - ASSERT(start_d <= ichdr_d->count); - ASSERT(count <= ichdr_s->count); - - - /* - * Move the entries in the destination leaf up to make a hole? - */ - if (start_d < ichdr_d->count) { - tmp = ichdr_d->count - start_d; - tmp *= sizeof(xfs_attr_leaf_entry_t); - entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; - entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count]; - memmove(entry_d, entry_s, tmp); - } - - /* - * Copy all entry's in the same (sorted) order, - * but allocate attribute info packed and in sequence. - */ - entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; - entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; - desti = start_d; - for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { - ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused); - tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); -#ifdef GROT - /* - * Code to drop INCOMPLETE entries. Difficult to use as we - * may also need to change the insertion index. Code turned - * off for 6.2, should be revisited later. - */ - if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ - memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); - ichdr_s->usedbytes -= tmp; - ichdr_s->count -= 1; - entry_d--; /* to compensate for ++ in loop hdr */ - desti--; - if ((start_s + i) < offset) - result++; /* insertion index adjustment */ - } else { -#endif /* GROT */ - ichdr_d->firstused -= tmp; - /* both on-disk, don't endian flip twice */ - entry_d->hashval = entry_s->hashval; - entry_d->nameidx = cpu_to_be16(ichdr_d->firstused); - entry_d->flags = entry_s->flags; - ASSERT(be16_to_cpu(entry_d->nameidx) + tmp - <= args->geo->blksize); - memmove(xfs_attr3_leaf_name(leaf_d, desti), - xfs_attr3_leaf_name(leaf_s, start_s + i), tmp); - ASSERT(be16_to_cpu(entry_s->nameidx) + tmp - <= args->geo->blksize); - memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); - ichdr_s->usedbytes -= tmp; - ichdr_d->usedbytes += tmp; - ichdr_s->count -= 1; - ichdr_d->count += 1; - tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf_d); - ASSERT(ichdr_d->firstused >= tmp); -#ifdef GROT - } -#endif /* GROT */ - } - - /* - * Zero out the entries we just copied. - */ - if (start_s == ichdr_s->count) { - tmp = count * sizeof(xfs_attr_leaf_entry_t); - entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; - ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + args->geo->blksize)); - memset(entry_s, 0, tmp); - } else { - /* - * Move the remaining entries down to fill the hole, - * then zero the entries at the top. - */ - tmp = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t); - entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count]; - entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; - memmove(entry_d, entry_s, tmp); - - tmp = count * sizeof(xfs_attr_leaf_entry_t); - entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count]; - ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + args->geo->blksize)); - memset(entry_s, 0, tmp); - } - - /* - * Fill in the freemap information - */ - ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); - ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); - ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; - ichdr_d->freemap[1].base = 0; - ichdr_d->freemap[2].base = 0; - ichdr_d->freemap[1].size = 0; - ichdr_d->freemap[2].size = 0; - ichdr_s->holes = 1; /* leaf may not be compact */ -} - -/* - * Pick up the last hashvalue from a leaf block. - */ -xfs_dahash_t -xfs_attr_leaf_lasthash( - struct xfs_buf *bp, - int *count) -{ - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entries; - - xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); - entries = xfs_attr3_leaf_entryp(bp->b_addr); - if (count) - *count = ichdr.count; - if (!ichdr.count) - return 0; - return be32_to_cpu(entries[ichdr.count - 1].hashval); -} - -/* - * Calculate the number of bytes used to store the indicated attribute - * (whether local or remote only calculate bytes in this block). - */ -STATIC int -xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) -{ - struct xfs_attr_leaf_entry *entries; - xfs_attr_leaf_name_local_t *name_loc; - xfs_attr_leaf_name_remote_t *name_rmt; - int size; - - entries = xfs_attr3_leaf_entryp(leaf); - if (entries[index].flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf, index); - size = xfs_attr_leaf_entsize_local(name_loc->namelen, - be16_to_cpu(name_loc->valuelen)); - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, index); - size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); - } - return size; -} - -/* - * Calculate the number of bytes that would be required to store the new - * attribute (whether local or remote only calculate bytes in this block). - * This routine decides as a side effect whether the attribute will be - * a "local" or a "remote" attribute. - */ -int -xfs_attr_leaf_newentsize( - struct xfs_da_args *args, - int *local) -{ - int size; - - size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); - if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { - if (local) - *local = 1; - return size; - } - if (local) - *local = 0; - return xfs_attr_leaf_entsize_remote(args->namelen); -} - - -/*======================================================================== - * Manage the INCOMPLETE flag in a leaf entry - *========================================================================*/ - -/* - * Clear the INCOMPLETE flag on an entry in a leaf block. - */ -int -xfs_attr3_leaf_clearflag( - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_name_remote *name_rmt; - struct xfs_buf *bp; - int error; -#ifdef DEBUG - struct xfs_attr3_icleaf_hdr ichdr; - xfs_attr_leaf_name_local_t *name_loc; - int namelen; - char *name; -#endif /* DEBUG */ - - trace_xfs_attr_leaf_clearflag(args); - /* - * Set up the operation. - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); - if (error) - return error; - - leaf = bp->b_addr; - entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; - ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); - -#ifdef DEBUG - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(args->index < ichdr.count); - ASSERT(args->index >= 0); - - if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf, args->index); - namelen = name_loc->namelen; - name = (char *)name_loc->nameval; - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - namelen = name_rmt->namelen; - name = (char *)name_rmt->name; - } - ASSERT(be32_to_cpu(entry->hashval) == args->hashval); - ASSERT(namelen == args->namelen); - ASSERT(memcmp(name, args->name, namelen) == 0); -#endif /* DEBUG */ - - entry->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); - - if (args->rmtblkno) { - ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); - } - - /* - * Commit the flag value change and start the next trans in series. - */ - return xfs_trans_roll(&args->trans, args->dp); -} - -/* - * Set the INCOMPLETE flag on an entry in a leaf block. - */ -int -xfs_attr3_leaf_setflag( - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf; - struct xfs_attr_leaf_entry *entry; - struct xfs_attr_leaf_name_remote *name_rmt; - struct xfs_buf *bp; - int error; -#ifdef DEBUG - struct xfs_attr3_icleaf_hdr ichdr; -#endif - - trace_xfs_attr_leaf_setflag(args); - - /* - * Set up the operation. - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); - if (error) - return error; - - leaf = bp->b_addr; -#ifdef DEBUG - xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(args->index < ichdr.count); - ASSERT(args->index >= 0); -#endif - entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; - - ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); - entry->flags |= XFS_ATTR_INCOMPLETE; - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); - if ((entry->flags & XFS_ATTR_LOCAL) == 0) { - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - name_rmt->valueblk = 0; - name_rmt->valuelen = 0; - xfs_trans_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); - } - - /* - * Commit the flag value change and start the next trans in series. - */ - return xfs_trans_roll(&args->trans, args->dp); -} - -/* - * In a single transaction, clear the INCOMPLETE flag on the leaf entry - * given by args->blkno/index and set the INCOMPLETE flag on the leaf - * entry given by args->blkno2/index2. - * - * Note that they could be in different blocks, or in the same block. - */ -int -xfs_attr3_leaf_flipflags( - struct xfs_da_args *args) -{ - struct xfs_attr_leafblock *leaf1; - struct xfs_attr_leafblock *leaf2; - struct xfs_attr_leaf_entry *entry1; - struct xfs_attr_leaf_entry *entry2; - struct xfs_attr_leaf_name_remote *name_rmt; - struct xfs_buf *bp1; - struct xfs_buf *bp2; - int error; -#ifdef DEBUG - struct xfs_attr3_icleaf_hdr ichdr1; - struct xfs_attr3_icleaf_hdr ichdr2; - xfs_attr_leaf_name_local_t *name_loc; - int namelen1, namelen2; - char *name1, *name2; -#endif /* DEBUG */ - - trace_xfs_attr_leaf_flipflags(args); - - /* - * Read the block containing the "old" attr - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); - if (error) - return error; - - /* - * Read the block containing the "new" attr, if it is different - */ - if (args->blkno2 != args->blkno) { - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2, - -1, &bp2); - if (error) - return error; - } else { - bp2 = bp1; - } - - leaf1 = bp1->b_addr; - entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index]; - - leaf2 = bp2->b_addr; - entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; - -#ifdef DEBUG - xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); - ASSERT(args->index < ichdr1.count); - ASSERT(args->index >= 0); - - xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); - ASSERT(args->index2 < ichdr2.count); - ASSERT(args->index2 >= 0); - - if (entry1->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf1, args->index); - namelen1 = name_loc->namelen; - name1 = (char *)name_loc->nameval; - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); - namelen1 = name_rmt->namelen; - name1 = (char *)name_rmt->name; - } - if (entry2->flags & XFS_ATTR_LOCAL) { - name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2); - namelen2 = name_loc->namelen; - name2 = (char *)name_loc->nameval; - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); - namelen2 = name_rmt->namelen; - name2 = (char *)name_rmt->name; - } - ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval)); - ASSERT(namelen1 == namelen2); - ASSERT(memcmp(name1, name2, namelen1) == 0); -#endif /* DEBUG */ - - ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE); - ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0); - - entry1->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_trans_log_buf(args->trans, bp1, - XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); - if (args->rmtblkno) { - ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); - name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); - name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); - xfs_trans_log_buf(args->trans, bp1, - XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); - } - - entry2->flags |= XFS_ATTR_INCOMPLETE; - xfs_trans_log_buf(args->trans, bp2, - XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); - if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { - name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); - name_rmt->valueblk = 0; - name_rmt->valuelen = 0; - xfs_trans_log_buf(args->trans, bp2, - XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); - } - - /* - * Commit the flag value change and start the next trans in series. - */ - error = xfs_trans_roll(&args->trans, args->dp); - - return error; -} diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c deleted file mode 100644 index a8bbc562ff35..000000000000 --- a/fs/xfs/xfs_attr_remote.c +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" -#include "xfs_error.h" - -#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ - -/* - * Each contiguous block has a header, so it is not just a simple attribute - * length to FSB conversion. - */ -int -xfs_attr3_rmt_blocks( - struct xfs_mount *mp, - int attrlen) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) { - int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); - return (attrlen + buflen - 1) / buflen; - } - return XFS_B_TO_FSB(mp, attrlen); -} - -/* - * Checking of the remote attribute header is split into two parts. The verifier - * does CRC, location and bounds checking, the unpacking function checks the - * attribute parameters and owner. - */ -static bool -xfs_attr3_rmt_hdr_ok( - void *ptr, - xfs_ino_t ino, - uint32_t offset, - uint32_t size, - xfs_daddr_t bno) -{ - struct xfs_attr3_rmt_hdr *rmt = ptr; - - if (bno != be64_to_cpu(rmt->rm_blkno)) - return false; - if (offset != be32_to_cpu(rmt->rm_offset)) - return false; - if (size != be32_to_cpu(rmt->rm_bytes)) - return false; - if (ino != be64_to_cpu(rmt->rm_owner)) - return false; - - /* ok */ - return true; -} - -static bool -xfs_attr3_rmt_verify( - struct xfs_mount *mp, - void *ptr, - int fsbsize, - xfs_daddr_t bno) -{ - struct xfs_attr3_rmt_hdr *rmt = ptr; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) - return false; - if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(rmt->rm_blkno) != bno) - return false; - if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) - return false; - if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) - return false; - if (rmt->rm_owner == 0) - return false; - - return true; -} - -static void -xfs_attr3_rmt_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - char *ptr; - int len; - xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; - - /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - ptr = bp->b_addr; - bno = bp->b_bn; - len = BBTOB(bp->b_length); - ASSERT(len >= blksize); - - while (len > 0) { - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, EFSBADCRC); - break; - } - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - break; - } - len -= blksize; - ptr += blksize; - bno += BTOBB(blksize); - } - - if (bp->b_error) - xfs_verifier_error(bp); - else - ASSERT(len == 0); -} - -static void -xfs_attr3_rmt_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - char *ptr; - int len; - xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; - - /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - ptr = bp->b_addr; - bno = bp->b_bn; - len = BBTOB(bp->b_length); - ASSERT(len >= blksize); - - while (len > 0) { - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); - } - xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); - - len -= blksize; - ptr += blksize; - bno += BTOBB(blksize); - } - ASSERT(len == 0); -} - -const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { - .verify_read = xfs_attr3_rmt_read_verify, - .verify_write = xfs_attr3_rmt_write_verify, -}; - -STATIC int -xfs_attr3_rmt_hdr_set( - struct xfs_mount *mp, - void *ptr, - xfs_ino_t ino, - uint32_t offset, - uint32_t size, - xfs_daddr_t bno) -{ - struct xfs_attr3_rmt_hdr *rmt = ptr; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return 0; - - rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); - rmt->rm_offset = cpu_to_be32(offset); - rmt->rm_bytes = cpu_to_be32(size); - uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); - rmt->rm_owner = cpu_to_be64(ino); - rmt->rm_blkno = cpu_to_be64(bno); - - return sizeof(struct xfs_attr3_rmt_hdr); -} - -/* - * Helper functions to copy attribute data in and out of the one disk extents - */ -STATIC int -xfs_attr_rmtval_copyout( - struct xfs_mount *mp, - struct xfs_buf *bp, - xfs_ino_t ino, - int *offset, - int *valuelen, - __uint8_t **dst) -{ - char *src = bp->b_addr; - xfs_daddr_t bno = bp->b_bn; - int len = BBTOB(bp->b_length); - int blksize = mp->m_attr_geo->blksize; - - ASSERT(len >= blksize); - - while (len > 0 && *valuelen > 0) { - int hdr_size = 0; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); - - byte_cnt = min(*valuelen, byte_cnt); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, - byte_cnt, bno)) { - xfs_alert(mp, -"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", - bno, *offset, byte_cnt, ino); - return EFSCORRUPTED; - } - hdr_size = sizeof(struct xfs_attr3_rmt_hdr); - } - - memcpy(*dst, src + hdr_size, byte_cnt); - - /* roll buffer forwards */ - len -= blksize; - src += blksize; - bno += BTOBB(blksize); - - /* roll attribute data forwards */ - *valuelen -= byte_cnt; - *dst += byte_cnt; - *offset += byte_cnt; - } - return 0; -} - -STATIC void -xfs_attr_rmtval_copyin( - struct xfs_mount *mp, - struct xfs_buf *bp, - xfs_ino_t ino, - int *offset, - int *valuelen, - __uint8_t **src) -{ - char *dst = bp->b_addr; - xfs_daddr_t bno = bp->b_bn; - int len = BBTOB(bp->b_length); - int blksize = mp->m_attr_geo->blksize; - - ASSERT(len >= blksize); - - while (len > 0 && *valuelen > 0) { - int hdr_size; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); - - byte_cnt = min(*valuelen, byte_cnt); - hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, - byte_cnt, bno); - - memcpy(dst + hdr_size, *src, byte_cnt); - - /* - * If this is the last block, zero the remainder of it. - * Check that we are actually the last block, too. - */ - if (byte_cnt + hdr_size < blksize) { - ASSERT(*valuelen - byte_cnt == 0); - ASSERT(len == blksize); - memset(dst + hdr_size + byte_cnt, 0, - blksize - hdr_size - byte_cnt); - } - - /* roll buffer forwards */ - len -= blksize; - dst += blksize; - bno += BTOBB(blksize); - - /* roll attribute data forwards */ - *valuelen -= byte_cnt; - *src += byte_cnt; - *offset += byte_cnt; - } -} - -/* - * Read the value associated with an attribute from the out-of-line buffer - * that we stored it in. - */ -int -xfs_attr_rmtval_get( - struct xfs_da_args *args) -{ - struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; - struct xfs_mount *mp = args->dp->i_mount; - struct xfs_buf *bp; - xfs_dablk_t lblkno = args->rmtblkno; - __uint8_t *dst = args->value; - int valuelen; - int nmap; - int error; - int blkcnt = args->rmtblkcnt; - int i; - int offset = 0; - - trace_xfs_attr_rmtval_get(args); - - ASSERT(!(args->flags & ATTR_KERNOVAL)); - ASSERT(args->rmtvaluelen == args->valuelen); - - valuelen = args->rmtvaluelen; - while (valuelen > 0) { - nmap = ATTR_RMTVALUE_MAPSIZE; - error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, - blkcnt, map, &nmap, - XFS_BMAPI_ATTRFORK); - if (error) - return error; - ASSERT(nmap >= 1); - - for (i = 0; (i < nmap) && (valuelen > 0); i++) { - xfs_daddr_t dblkno; - int dblkcnt; - - ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && - (map[i].br_startblock != HOLESTARTBLOCK)); - dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); - dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, - dblkno, dblkcnt, 0, &bp, - &xfs_attr3_rmt_buf_ops); - if (error) - return error; - - error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, - &offset, &valuelen, - &dst); - xfs_buf_relse(bp); - if (error) - return error; - - /* roll attribute extent map forwards */ - lblkno += map[i].br_blockcount; - blkcnt -= map[i].br_blockcount; - } - } - ASSERT(valuelen == 0); - return 0; -} - -/* - * Write the value associated with an attribute into the out-of-line buffer - * that we have defined for it. - */ -int -xfs_attr_rmtval_set( - struct xfs_da_args *args) -{ - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - struct xfs_bmbt_irec map; - xfs_dablk_t lblkno; - xfs_fileoff_t lfileoff = 0; - __uint8_t *src = args->value; - int blkcnt; - int valuelen; - int nmap; - int error; - int offset = 0; - - trace_xfs_attr_rmtval_set(args); - - /* - * Find a "hole" in the attribute address space large enough for - * us to drop the new attribute's value into. Because CRC enable - * attributes have headers, we can't just do a straight byte to FSB - * conversion and have to take the header space into account. - */ - blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); - error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, - XFS_ATTR_FORK); - if (error) - return error; - - args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; - args->rmtblkcnt = blkcnt; - - /* - * Roll through the "value", allocating blocks on disk as required. - */ - while (blkcnt > 0) { - int committed; - - /* - * Allocate a single extent, up to the size of the value. - */ - xfs_bmap_init(args->flist, args->firstblock); - nmap = 1; - error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - return error; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, dp, 0); - - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - lblkno += map.br_blockcount; - blkcnt -= map.br_blockcount; - - /* - * Start the next trans in the chain. - */ - error = xfs_trans_roll(&args->trans, dp); - if (error) - return error; - } - - /* - * Roll through the "value", copying the attribute value to the - * already-allocated blocks. Blocks are written synchronously - * so that we can know they are all on disk before we turn off - * the INCOMPLETE flag. - */ - lblkno = args->rmtblkno; - blkcnt = args->rmtblkcnt; - valuelen = args->rmtvaluelen; - while (valuelen > 0) { - struct xfs_buf *bp; - xfs_daddr_t dblkno; - int dblkcnt; - - ASSERT(blkcnt > 0); - - xfs_bmap_init(args->flist, args->firstblock); - nmap = 1; - error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, - blkcnt, &map, &nmap, - XFS_BMAPI_ATTRFORK); - if (error) - return error; - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - - dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), - dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); - if (!bp) - return ENOMEM; - bp->b_ops = &xfs_attr3_rmt_buf_ops; - - xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, - &valuelen, &src); - - error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ - xfs_buf_relse(bp); - if (error) - return error; - - - /* roll attribute extent map forwards */ - lblkno += map.br_blockcount; - blkcnt -= map.br_blockcount; - } - ASSERT(valuelen == 0); - return 0; -} - -/* - * Remove the value associated with an attribute by deleting the - * out-of-line buffer that it is stored on. - */ -int -xfs_attr_rmtval_remove( - struct xfs_da_args *args) -{ - struct xfs_mount *mp = args->dp->i_mount; - xfs_dablk_t lblkno; - int blkcnt; - int error; - int done; - - trace_xfs_attr_rmtval_remove(args); - - /* - * Roll through the "value", invalidating the attribute value's blocks. - */ - lblkno = args->rmtblkno; - blkcnt = args->rmtblkcnt; - while (blkcnt > 0) { - struct xfs_bmbt_irec map; - struct xfs_buf *bp; - xfs_daddr_t dblkno; - int dblkcnt; - int nmap; - - /* - * Try to remember where we decided to put the value. - */ - nmap = 1; - error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, - blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); - if (error) - return error; - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - - dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), - dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - - /* - * If the "remote" value is in the cache, remove it. - */ - bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); - if (bp) { - xfs_buf_stale(bp); - xfs_buf_relse(bp); - bp = NULL; - } - - lblkno += map.br_blockcount; - blkcnt -= map.br_blockcount; - } - - /* - * Keep de-allocating extents until the remote-value region is gone. - */ - lblkno = args->rmtblkno; - blkcnt = args->rmtblkcnt; - done = 0; - while (!done) { - int committed; - - xfs_bmap_init(args->flist, args->firstblock); - error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - 1, args->firstblock, args->flist, - &done); - if (!error) { - error = xfs_bmap_finish(&args->trans, args->flist, - &committed); - } - if (error) { - ASSERT(committed); - args->trans = NULL; - xfs_bmap_cancel(args->flist); - return error; - } - - /* - * bmap_finish() may have committed the last trans and started - * a new one. We need the inode to be in all transactions. - */ - if (committed) - xfs_trans_ijoin(args->trans, args->dp, 0); - - /* - * Close out trans and start the next one in the chain. - */ - error = xfs_trans_roll(&args->trans, args->dp); - if (error) - return error; - } - return 0; -} diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c deleted file mode 100644 index b44d63189dab..000000000000 --- a/fs/xfs/xfs_bmap.c +++ /dev/null @@ -1,5609 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_dir2.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_extfree_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_trans_space.h" -#include "xfs_buf_item.h" -#include "xfs_trace.h" -#include "xfs_symlink.h" -#include "xfs_attr_leaf.h" -#include "xfs_dinode.h" -#include "xfs_filestream.h" - - -kmem_zone_t *xfs_bmap_free_item_zone; - -/* - * Miscellaneous helper functions - */ - -/* - * Compute and fill in the value of the maximum depth of a bmap btree - * in this filesystem. Done once, during mount. - */ -void -xfs_bmap_compute_maxlevels( - xfs_mount_t *mp, /* file system mount structure */ - int whichfork) /* data or attr fork */ -{ - int level; /* btree level */ - uint maxblocks; /* max blocks at this level */ - uint maxleafents; /* max leaf entries possible */ - int maxrootrecs; /* max records in root block */ - int minleafrecs; /* min records in leaf block */ - int minnoderecs; /* min records in node block */ - int sz; /* root block size */ - - /* - * The maximum number of extents in a file, hence the maximum - * number of leaf entries, is controlled by the type of di_nextents - * (a signed 32-bit number, xfs_extnum_t), or by di_anextents - * (a signed 16-bit number, xfs_aextnum_t). - * - * Note that we can no longer assume that if we are in ATTR1 that - * the fork offset of all the inodes will be - * (xfs_default_attroffset(ip) >> 3) because we could have mounted - * with ATTR2 and then mounted back with ATTR1, keeping the - * di_forkoff's fixed but probably at various positions. Therefore, - * for both ATTR1 and ATTR2 we have to assume the worst case scenario - * of a minimum size available. - */ - if (whichfork == XFS_DATA_FORK) { - maxleafents = MAXEXTNUM; - sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); - } else { - maxleafents = MAXAEXTNUM; - sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); - } - maxrootrecs = xfs_bmdr_maxrecs(sz, 0); - minleafrecs = mp->m_bmap_dmnr[0]; - minnoderecs = mp->m_bmap_dmnr[1]; - maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; - for (level = 1; maxblocks > 1; level++) { - if (maxblocks <= maxrootrecs) - maxblocks = 1; - else - maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; - } - mp->m_bm_maxlevels[whichfork] = level; -} - -STATIC int /* error */ -xfs_bmbt_lookup_eq( - struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -STATIC int /* error */ -xfs_bmbt_lookup_ge( - struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Check if the inode needs to be converted to btree format. - */ -static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) -{ - return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) > - XFS_IFORK_MAXEXT(ip, whichfork); -} - -/* - * Check if the inode should be converted to extent format. - */ -static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) -{ - return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && - XFS_IFORK_NEXTENTS(ip, whichfork) <= - XFS_IFORK_MAXEXT(ip, whichfork); -} - -/* - * Update the record referred to by cur to the value given - * by [off, bno, len, state]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -STATIC int -xfs_bmbt_update( - struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - xfs_exntst_t state) -{ - union xfs_btree_rec rec; - - xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); - return xfs_btree_update(cur, &rec); -} - -/* - * Compute the worst-case number of indirect blocks that will be used - * for ip's delayed extent of length "len". - */ -STATIC xfs_filblks_t -xfs_bmap_worst_indlen( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_filblks_t len) /* delayed extent length */ -{ - int level; /* btree level number */ - int maxrecs; /* maximum record count at this level */ - xfs_mount_t *mp; /* mount structure */ - xfs_filblks_t rval; /* return value */ - - mp = ip->i_mount; - maxrecs = mp->m_bmap_dmxr[0]; - for (level = 0, rval = 0; - level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); - level++) { - len += maxrecs - 1; - do_div(len, maxrecs); - rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - - level - 1; - if (level == 0) - maxrecs = mp->m_bmap_dmxr[1]; - } - return rval; -} - -/* - * Calculate the default attribute fork offset for newly created inodes. - */ -uint -xfs_default_attroffset( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - uint offset; - - if (mp->m_sb.sb_inodesize == 256) { - offset = XFS_LITINO(mp, ip->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); - } else { - offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); - } - - ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); - return offset; -} - -/* - * Helper routine to reset inode di_forkoff field when switching - * attribute fork from local to extent format - we reset it where - * possible to make space available for inline data fork extents. - */ -STATIC void -xfs_bmap_forkoff_reset( - xfs_inode_t *ip, - int whichfork) -{ - if (whichfork == XFS_ATTR_FORK && - ip->i_d.di_format != XFS_DINODE_FMT_DEV && - ip->i_d.di_format != XFS_DINODE_FMT_UUID && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { - uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; - - if (dfl_forkoff > ip->i_d.di_forkoff) - ip->i_d.di_forkoff = dfl_forkoff; - } -} - -/* - * Debug/sanity checking code - */ - -STATIC int -xfs_bmap_sanity_check( - struct xfs_mount *mp, - struct xfs_buf *bp, - int level) -{ - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - - if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && - block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) - return 0; - - if (be16_to_cpu(block->bb_level) != level || - be16_to_cpu(block->bb_numrecs) == 0 || - be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return 0; - - return 1; -} - -#ifdef DEBUG -STATIC struct xfs_buf * -xfs_bmap_get_bp( - struct xfs_btree_cur *cur, - xfs_fsblock_t bno) -{ - struct xfs_log_item_desc *lidp; - int i; - - if (!cur) - return NULL; - - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - if (!cur->bc_bufs[i]) - break; - if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) - return cur->bc_bufs[i]; - } - - /* Chase down all the log items to see if the bp is there */ - list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { - struct xfs_buf_log_item *bip; - bip = (struct xfs_buf_log_item *)lidp->lid_item; - if (bip->bli_item.li_type == XFS_LI_BUF && - XFS_BUF_ADDR(bip->bli_buf) == bno) - return bip->bli_buf; - } - - return NULL; -} - -STATIC void -xfs_check_block( - struct xfs_btree_block *block, - xfs_mount_t *mp, - int root, - short sz) -{ - int i, j, dmxr; - __be64 *pp, *thispa; /* pointer to block address */ - xfs_bmbt_key_t *prevp, *keyp; - - ASSERT(be16_to_cpu(block->bb_level) > 0); - - prevp = NULL; - for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { - dmxr = mp->m_bmap_dmxr[0]; - keyp = XFS_BMBT_KEY_ADDR(mp, block, i); - - if (prevp) { - ASSERT(be64_to_cpu(prevp->br_startoff) < - be64_to_cpu(keyp->br_startoff)); - } - prevp = keyp; - - /* - * Compare the block numbers to see if there are dups. - */ - if (root) - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); - else - pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); - - for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { - if (root) - thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); - else - thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); - if (*thispa == *pp) { - xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", - __func__, j, i, - (unsigned long long)be64_to_cpu(*thispa)); - panic("%s: ptrs are equal in node\n", - __func__); - } - } - } -} - -/* - * Check that the extents for the inode ip are in the right order in all - * btree leaves. - */ - -STATIC void -xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ - xfs_inode_t *ip, /* incore inode pointer */ - int whichfork) /* data or attr fork */ -{ - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ - int error; /* error return value */ - xfs_extnum_t i=0, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - xfs_bmbt_rec_t *ep; /* pointer to current extent */ - xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ - xfs_bmbt_rec_t *nextp; /* pointer to next extent */ - int bp_release = 0; - - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { - return; - } - - bno = NULLFSBLOCK; - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - block = ifp->if_broot; - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. - */ - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - - ASSERT(bno != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - - /* - * Go down the tree until leaf level is reached, following the first - * pointer (leftmost) at each level. - */ - while (level-- > 0) { - /* See if buf is in cur first */ - bp_release = 0; - bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); - if (!bp) { - bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - goto error_norelse; - } - block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, level), - error0); - if (level == 0) - break; - - /* - * Check this block for basic sanity (increasing keys and - * no duplicate blocks). - */ - - xfs_check_block(block, mp, 0, 0); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } - } - - /* - * Here with bp and block set to the leftmost leaf node in the tree. - */ - i = 0; - - /* - * Loop over all leaf nodes checking that all extents are in the right order. - */ - for (;;) { - xfs_fsblock_t nextbno; - xfs_extnum_t num_recs; - - - num_recs = xfs_btree_get_numrecs(block); - - /* - * Read-ahead the next leaf block, if any. - */ - - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - - /* - * Check all the extents to make sure they are OK. - * If we had a previous block, the last entry should - * conform with the first entry in this one. - */ - - ep = XFS_BMBT_REC_ADDR(mp, block, 1); - if (i) { - ASSERT(xfs_bmbt_disk_get_startoff(&last) + - xfs_bmbt_disk_get_blockcount(&last) <= - xfs_bmbt_disk_get_startoff(ep)); - } - for (j = 1; j < num_recs; j++) { - nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); - ASSERT(xfs_bmbt_disk_get_startoff(ep) + - xfs_bmbt_disk_get_blockcount(ep) <= - xfs_bmbt_disk_get_startoff(nextp)); - ep = nextp; - } - - last = *ep; - i += num_recs; - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } - bno = nextbno; - /* - * If we've reached the end, stop. - */ - if (bno == NULLFSBLOCK) - break; - - bp_release = 0; - bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); - if (!bp) { - bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - goto error_norelse; - } - block = XFS_BUF_TO_BLOCK(bp); - } - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } - return; - -error0: - xfs_warn(mp, "%s: at error0", __func__); - if (bp_release) - xfs_trans_brelse(NULL, bp); -error_norelse: - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", - __func__, i); - panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); - return; -} - -/* - * Add bmap trace insert entries for all the contents of the extent records. - */ -void -xfs_bmap_trace_exlist( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr fork */ - unsigned long caller_ip) -{ - xfs_extnum_t idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int state = 0; - - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, whichfork, caller_ip); -} - -/* - * Validate that the bmbt_irecs being returned from bmapi are valid - * given the caller's original parameters. Specifically check the - * ranges of the returned irecs to ensure that they only extend beyond - * the given parameters if the XFS_BMAPI_ENTIRE flag was set. - */ -STATIC void -xfs_bmap_validate_ret( - xfs_fileoff_t bno, - xfs_filblks_t len, - int flags, - xfs_bmbt_irec_t *mval, - int nmap, - int ret_nmap) -{ - int i; /* index to map values */ - - ASSERT(ret_nmap <= nmap); - - for (i = 0; i < ret_nmap; i++) { - ASSERT(mval[i].br_blockcount > 0); - if (!(flags & XFS_BMAPI_ENTIRE)) { - ASSERT(mval[i].br_startoff >= bno); - ASSERT(mval[i].br_blockcount <= len); - ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= - bno + len); - } else { - ASSERT(mval[i].br_startoff < bno + len); - ASSERT(mval[i].br_startoff + mval[i].br_blockcount > - bno); - } - ASSERT(i == 0 || - mval[i - 1].br_startoff + mval[i - 1].br_blockcount == - mval[i].br_startoff); - ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && - mval[i].br_startblock != HOLESTARTBLOCK); - ASSERT(mval[i].br_state == XFS_EXT_NORM || - mval[i].br_state == XFS_EXT_UNWRITTEN); - } -} - -#else -#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) -#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) -#endif /* DEBUG */ - -/* - * bmap free list manipulation functions - */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -xfs_bmap_add_free( - xfs_fsblock_t bno, /* fs block number of extent */ - xfs_filblks_t len, /* length of extent */ - xfs_bmap_free_t *flist, /* list of extents */ - xfs_mount_t *mp) /* mount point structure */ -{ - xfs_bmap_free_item_t *cur; /* current (next) element */ - xfs_bmap_free_item_t *new; /* new element */ - xfs_bmap_free_item_t *prev; /* previous element */ -#ifdef DEBUG - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); - new->xbfi_startblock = bno; - new->xbfi_blockcount = (xfs_extlen_t)len; - for (prev = NULL, cur = flist->xbf_first; - cur != NULL; - prev = cur, cur = cur->xbfi_next) { - if (cur->xbfi_startblock >= bno) - break; - } - if (prev) - prev->xbfi_next = new; - else - flist->xbf_first = new; - new->xbfi_next = cur; - flist->xbf_count++; -} - -/* - * Remove the entry "free" from the free item list. Prev points to the - * previous entry, unless "free" is the head of the list. - */ -void -xfs_bmap_del_free( - xfs_bmap_free_t *flist, /* free item list header */ - xfs_bmap_free_item_t *prev, /* previous item on list, if any */ - xfs_bmap_free_item_t *free) /* list item to be freed */ -{ - if (prev) - prev->xbfi_next = free->xbfi_next; - else - flist->xbf_first = free->xbfi_next; - flist->xbf_count--; - kmem_zone_free(xfs_bmap_free_item_zone, free); -} - -/* - * Free up any items left in the list. - */ -void -xfs_bmap_cancel( - xfs_bmap_free_t *flist) /* list of bmap_free_items */ -{ - xfs_bmap_free_item_t *free; /* free list item */ - xfs_bmap_free_item_t *next; - - if (flist->xbf_count == 0) - return; - ASSERT(flist->xbf_first != NULL); - for (free = flist->xbf_first; free; free = next) { - next = free->xbfi_next; - xfs_bmap_del_free(flist, NULL, free); - } - ASSERT(flist->xbf_count == 0); -} - -/* - * Inode fork format manipulation functions - */ - -/* - * Transform a btree format file with only one leaf node, where the - * extents list will fit in the inode, into an extents format file. - * Since the file extents are already in-core, all we have to do is - * give up the space for the btree root and pitch the leaf block. - */ -STATIC int /* error */ -xfs_bmap_btree_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_btree_cur_t *cur, /* btree cursor */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - /* REFERENCED */ - struct xfs_btree_block *cblock;/* child btree block */ - xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork data */ - xfs_mount_t *mp; /* mount point structure */ - __be64 *pp; /* ptr to block address */ - struct xfs_btree_block *rblock;/* root btree block */ - - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - rblock = ifp->if_broot; - ASSERT(be16_to_cpu(rblock->bb_level) == 1); - ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); - cbno = be64_to_cpu(*pp); - *logflagsp = 0; -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, cbno, 1))) - return error; -#endif - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - cblock = XFS_BUF_TO_BLOCK(cbp); - if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) - return error; - xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); - ip->i_d.di_nblocks--; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(tp, cbp); - if (cur->bc_bufs[0] == cbp) - cur->bc_bufs[0] = NULL; - xfs_iroot_realloc(ip, -1, whichfork); - ASSERT(ifp->if_broot == NULL); - ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); - *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - return 0; -} - -/* - * Convert an extents-format file into a btree-format file. - * The new file will have a root block (in the inode) and a single child block. - */ -STATIC int /* error */ -xfs_bmap_extents_to_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first-block-allocated */ - xfs_bmap_free_t *flist, /* blocks freed in xaction */ - xfs_btree_cur_t **curp, /* cursor returned to caller */ - int wasdel, /* converting a delayed alloc */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - struct xfs_btree_block *ablock; /* allocated (child) bt block */ - xfs_buf_t *abp; /* buffer for ablock */ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_bmbt_rec_t *arp; /* child record pointer */ - struct xfs_btree_block *block; /* btree root block */ - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - int error; /* error return value */ - xfs_extnum_t i, cnt; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_key_t *kp; /* root block key pointer */ - xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_ptr_t *pp; /* root block address pointer */ - - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); - - /* - * Make space in the inode incore. - */ - xfs_iroot_realloc(ip, 1, whichfork); - ifp->if_flags |= XFS_IFBROOT; - - /* - * Fill in the root. - */ - block = ifp->if_broot; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS); - - /* - * Need a cursor. Can't allocate until bb_level is filled in. - */ - cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.flist = flist; - cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; - /* - * Convert to a btree with two levels, one record in root. - */ - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = mp; - args.firstblock = *firstblock; - if (*firstblock == NULLFSBLOCK) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); - } else if (flist->xbf_low) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = *firstblock; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = *firstblock; - } - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = wasdel; - *logflagsp = 0; - if ((error = xfs_alloc_vextent(&args))) { - xfs_iroot_realloc(ip, -1, whichfork); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - /* - * Allocation can't fail, the space was reserved. - */ - ASSERT(args.fsbno != NULLFSBLOCK); - ASSERT(*firstblock == NULLFSBLOCK || - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || - (flist->xbf_low && - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); - *firstblock = cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - ip->i_d.di_nblocks++; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); - /* - * Fill in the child block. - */ - abp->b_ops = &xfs_bmbt_buf_ops; - ablock = XFS_BUF_TO_BLOCK(abp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); - - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (cnt = i = 0; i < nextents; i++) { - ep = xfs_iext_get_ext(ifp, i); - if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { - arp->l0 = cpu_to_be64(ep->l0); - arp->l1 = cpu_to_be64(ep->l1); - arp++; cnt++; - } - } - ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); - xfs_btree_set_numrecs(ablock, cnt); - - /* - * Fill in the root key and pointer. - */ - kp = XFS_BMBT_KEY_ADDR(mp, block, 1); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, - be16_to_cpu(block->bb_level))); - *pp = cpu_to_be64(args.fsbno); - - /* - * Do all this logging at the end so that - * the root is at the right level. - */ - xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); - xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); - ASSERT(*curp == NULL); - *curp = cur; - *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); - return 0; -} - -/* - * Convert a local file to an extents file. - * This code is out of bounds for data forks of regular files, - * since the file data needs to get logged so things will stay consistent. - * (The bmap-level manipulations are ok, though). - */ -void -xfs_bmap_local_to_extents_empty( - struct xfs_inode *ip, - int whichfork) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - ASSERT(ifp->if_bytes == 0); - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); - - xfs_bmap_forkoff_reset(ip, whichfork); - ifp->if_flags &= ~XFS_IFINLINE; - ifp->if_flags |= XFS_IFEXTENTS; - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); -} - - -STATIC int /* error */ -xfs_bmap_local_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated in xaction */ - xfs_extlen_t total, /* total blocks needed by transaction */ - int *logflagsp, /* inode logging flags */ - int whichfork, - void (*init_fn)(struct xfs_trans *tp, - struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp)) -{ - int error = 0; - int flags; /* logging flags returned */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - - /* - * We don't want to deal with the case of keeping inode data inline yet. - * So sending the data fork of a regular inode is invalid. - */ - ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - - if (!ifp->if_bytes) { - xfs_bmap_local_to_extents_empty(ip, whichfork); - flags = XFS_ILOG_CORE; - goto done; - } - - flags = 0; - error = 0; - ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == - XFS_IFINLINE); - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = ip->i_mount; - args.firstblock = *firstblock; - /* - * Allocate a block. We know we need only one, since the - * file currently fits in an inode. - */ - if (*firstblock == NULLFSBLOCK) { - args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.fsbno = *firstblock; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - } - args.total = total; - args.minlen = args.maxlen = args.prod = 1; - error = xfs_alloc_vextent(&args); - if (error) - goto done; - - /* Can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); - ASSERT(args.len == 1); - *firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - - /* initialise the block and copy the data */ - init_fn(tp, bp, ip, ifp); - - /* account for the change in fork size and log everything */ - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); - xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); - xfs_bmap_local_to_extents_empty(ip, whichfork); - flags |= XFS_ILOG_CORE; - - xfs_iext_add(ifp, 0, 1); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); - trace_xfs_bmap_post_update(ip, 0, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, - _THIS_IP_); - XFS_IFORK_NEXT_SET(ip, whichfork, 1); - ip->i_d.di_nblocks = 1; - xfs_trans_mod_dquot_byino(tp, ip, - XFS_TRANS_DQ_BCOUNT, 1L); - flags |= xfs_ilog_fext(whichfork); - -done: - *logflagsp = flags; - return error; -} - -/* - * Called from xfs_bmap_add_attrfork to handle btree format files. - */ -STATIC int /* error */ -xfs_bmap_add_attrfork_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ -{ - xfs_btree_cur_t *cur; /* btree cursor */ - int error; /* error return value */ - xfs_mount_t *mp; /* file system mount struct */ - int stat; /* newroot status */ - - mp = ip->i_mount; - if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) - *flags |= XFS_ILOG_DBROOT; - else { - cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); - cur->bc_private.b.flist = flist; - cur->bc_private.b.firstblock = *firstblock; - if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) - goto error0; - /* must be at least one entry */ - XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); - if ((error = xfs_btree_new_iroot(cur, flags, &stat))) - goto error0; - if (stat == 0) { - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return ENOSPC; - } - *firstblock = cur->bc_private.b.firstblock; - cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - } - return 0; -error0: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; -} - -/* - * Called from xfs_bmap_add_attrfork to handle extents format files. - */ -STATIC int /* error */ -xfs_bmap_add_attrfork_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ -{ - xfs_btree_cur_t *cur; /* bmap btree cursor */ - int error; /* error return value */ - - if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) - return 0; - cur = NULL; - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, - flags, XFS_DATA_FORK); - if (cur) { - cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - } - return error; -} - -/* - * Called from xfs_bmap_add_attrfork to handle local format files. Each - * different data fork content type needs a different callout to do the - * conversion. Some are basic and only require special block initialisation - * callouts for the data formating, others (directories) are so specialised they - * handle everything themselves. - * - * XXX (dgc): investigate whether directory conversion can use the generic - * formatting callout. It should be possible - it's just a very complex - * formatter. - */ -STATIC int /* error */ -xfs_bmap_add_attrfork_local( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ -{ - xfs_da_args_t dargs; /* args for dir/attr code */ - - if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) - return 0; - - if (S_ISDIR(ip->i_d.di_mode)) { - memset(&dargs, 0, sizeof(dargs)); - dargs.geo = ip->i_mount->m_dir_geo; - dargs.dp = ip; - dargs.firstblock = firstblock; - dargs.flist = flist; - dargs.total = dargs.geo->fsbcount; - dargs.whichfork = XFS_DATA_FORK; - dargs.trans = tp; - return xfs_dir2_sf_to_block(&dargs); - } - - if (S_ISLNK(ip->i_d.di_mode)) - return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, - flags, XFS_DATA_FORK, - xfs_symlink_local_to_remote); - - /* should only be called for types that support local format data */ - ASSERT(0); - return EFSCORRUPTED; -} - -/* - * Convert inode from non-attributed to attributed. - * Must not be in a transaction, ip must not be locked. - */ -int /* error code */ -xfs_bmap_add_attrfork( - xfs_inode_t *ip, /* incore inode pointer */ - int size, /* space new attribute needs */ - int rsvd) /* xact may use reserved blks */ -{ - xfs_fsblock_t firstblock; /* 1st block/ag allocated */ - xfs_bmap_free_t flist; /* freed extent records */ - xfs_mount_t *mp; /* mount structure */ - xfs_trans_t *tp; /* transaction pointer */ - int blks; /* space reservation */ - int version = 1; /* superblock attr version */ - int committed; /* xaction was committed */ - int logflags; /* logging flags */ - int error; /* error return value */ - int cancel_flags = 0; - - ASSERT(XFS_IFORK_Q(ip) == 0); - - mp = ip->i_mount; - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); - tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); - blks = XFS_ADDAFORK_SPACE_RES(mp); - if (rsvd) - tp->t_flags |= XFS_TRANS_RESERVE; - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? - XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); - if (error) - goto trans_cancel; - cancel_flags |= XFS_TRANS_ABORT; - if (XFS_IFORK_Q(ip)) - goto trans_cancel; - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { - /* - * For inodes coming from pre-6.2 filesystems. - */ - ASSERT(ip->i_d.di_aformat == 0); - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; - } - ASSERT(ip->i_d.di_anextents == 0); - - xfs_trans_ijoin(tp, ip, 0); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_DEV: - ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; - break; - case XFS_DINODE_FMT_UUID: - ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; - break; - case XFS_DINODE_FMT_LOCAL: - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); - if (!ip->i_d.di_forkoff) - ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; - else if (mp->m_flags & XFS_MOUNT_ATTR2) - version = 2; - break; - default: - ASSERT(0); - error = EINVAL; - goto trans_cancel; - } - - ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); - ip->i_afp->if_flags = XFS_IFEXTENTS; - logflags = 0; - xfs_bmap_init(&flist, &firstblock); - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_LOCAL: - error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, - &logflags); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, - &flist, &logflags); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, - &logflags); - break; - default: - error = 0; - break; - } - if (logflags) - xfs_trans_log_inode(tp, ip, logflags); - if (error) - goto bmap_cancel; - if (!xfs_sb_version_hasattr(&mp->m_sb) || - (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { - __int64_t sbfields = 0; - - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasattr(&mp->m_sb)) { - xfs_sb_version_addattr(&mp->m_sb); - sbfields |= XFS_SB_VERSIONNUM; - } - if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { - xfs_sb_version_addattr2(&mp->m_sb); - sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); - } - if (sbfields) { - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - } else - spin_unlock(&mp->m_sb_lock); - } - - error = xfs_bmap_finish(&tp, &flist, &committed); - if (error) - goto bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; - -bmap_cancel: - xfs_bmap_cancel(&flist); -trans_cancel: - xfs_trans_cancel(tp, cancel_flags); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; -} - -/* - * Internal and external extent tree search functions. - */ - -/* - * Read in the extents to if_extents. - * All inode fields are set up by caller, we just traverse the btree - * and copy the records in. If the file system cannot contain unwritten - * extents, the records are checked for no "state" flags. - */ -int /* error */ -xfs_bmap_read_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ -{ - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ - int error; /* error return value */ - xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ - xfs_extnum_t i, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - /* REFERENCED */ - xfs_extnum_t room; /* number of entries there's room for */ - - bno = NULLFSBLOCK; - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : - XFS_EXTFMT_INODE(ip); - block = ifp->if_broot; - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. - */ - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - /* - * Go down the tree until leaf level is reached, following the first - * pointer (leftmost) at each level. - */ - while (level-- > 0) { - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); - if (error) - return error; - block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, level), - error0); - if (level == 0) - break; - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); - xfs_trans_brelse(tp, bp); - } - /* - * Here with bp and block set to the leftmost leaf node in the tree. - */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - i = 0; - /* - * Loop over all leaf nodes. Copy information to the extent records. - */ - for (;;) { - xfs_bmbt_rec_t *frp; - xfs_fsblock_t nextbno; - xfs_extnum_t num_recs; - xfs_extnum_t start; - - num_recs = xfs_btree_get_numrecs(block); - if (unlikely(i + num_recs > room)) { - ASSERT(i + num_recs <= room); - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, (btree extents).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", - XFS_ERRLEVEL_LOW, ip->i_mount, block); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, 0), - error0); - /* - * Read-ahead the next leaf block, if any. - */ - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - if (nextbno != NULLFSBLOCK) - xfs_btree_reada_bufl(mp, nextbno, 1, - &xfs_bmbt_buf_ops); - /* - * Copy records into the extent records. - */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); - start = i; - for (j = 0; j < num_recs; j++, i++, frp++) { - xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); - trp->l0 = be64_to_cpu(frp->l0); - trp->l1 = be64_to_cpu(frp->l1); - } - if (exntf == XFS_EXTFMT_NOSTATE) { - /* - * Check all attribute bmap btree records and - * any "older" data bmap btree records for a - * set bit in the "extent flag" position. - */ - if (unlikely(xfs_check_nostate_extents(ifp, - start, num_recs))) { - XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); - goto error0; - } - } - xfs_trans_brelse(tp, bp); - bno = nextbno; - /* - * If we've reached the end, stop. - */ - if (bno == NULLFSBLOCK) - break; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); - if (error) - return error; - block = XFS_BUF_TO_BLOCK(bp); - } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); - XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); - return 0; -error0: - xfs_trans_brelse(tp, bp); - return EFSCORRUPTED; -} - - -/* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. - */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_multi_extents( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t lastx; /* last extent index */ - - /* - * Initialize the extent entry structure to catch access to - * uninitialized br_startblock field. - */ - gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; - gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; - gotp->br_state = XFS_EXT_INVALID; -#if XFS_BIG_BLKNOS - gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; -#else - gotp->br_startblock = 0xffffa5a5; -#endif - prevp->br_startoff = NULLFILEOFF; - - ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); - if (lastx > 0) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); - } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { - xfs_bmbt_get_all(ep, gotp); - *eofp = 0; - } else { - if (lastx > 0) { - *gotp = *prevp; - } - *eofp = 1; - ep = NULL; - } - *lastxp = lastx; - return ep; -} - -/* - * Search the extents list for the inode, for the extent containing bno. - * If bno lies in a hole, point to the next entry. If bno lies past eof, - * *eofp will be set, and *prevp will contain the last entry (null if none). - * Else, *lastxp will be set to the index of the found - * entry; *gotp will contain the entry. - */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_extents( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int fork, /* data or attr fork */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - - XFS_STATS_INC(xs_look_exlist); - ifp = XFS_IFORK_PTR(ip, fork); - - ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); - - if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && - !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, - "Access to block zero in inode %llu " - "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x", - (unsigned long long)ip->i_ino, - (unsigned long long)gotp->br_startblock, - (unsigned long long)gotp->br_startoff, - (unsigned long long)gotp->br_blockcount, - gotp->br_state, *lastxp); - *lastxp = NULLEXTNUM; - *eofp = 1; - return NULL; - } - return ep; -} - -/* - * Returns the file-relative block number of the first unused block(s) - * in the file with at least "len" logically contiguous blocks free. - * This is the lowest-address hole if the file has holes, else the first block - * past the end of file. - * Return 0 if the file is currently local (in-inode). - */ -int /* error */ -xfs_bmap_first_unused( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_extlen_t len, /* size of hole to find */ - xfs_fileoff_t *first_unused, /* unused block */ - int whichfork) /* data or attr fork */ -{ - int error; /* error return value */ - int idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t lastaddr; /* last block number seen */ - xfs_fileoff_t lowest; /* lowest useful block */ - xfs_fileoff_t max; /* starting useful block */ - xfs_fileoff_t off; /* offset for this block */ - xfs_extnum_t nextents; /* number of extent entries */ - - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - *first_unused = 0; - return 0; - } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); - off = xfs_bmbt_get_startoff(ep); - /* - * See if the hole before this extent will work. - */ - if (off >= lowest + len && off - max >= len) { - *first_unused = max; - return 0; - } - lastaddr = off + xfs_bmbt_get_blockcount(ep); - max = XFS_FILEOFF_MAX(lastaddr, lowest); - } - *first_unused = max; - return 0; -} - -/* - * Returns the file-relative block number of the last block - 1 before - * last_block (input value) in the file. - * This is not based on i_size, it is based on the extent records. - * Returns 0 for local files, as they do not have extent records. - */ -int /* error */ -xfs_bmap_last_before( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork) /* data or attr fork */ -{ - xfs_fileoff_t bno; /* input file offset */ - int eof; /* hit end of file */ - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ - int error; /* error return value */ - xfs_bmbt_irec_t got; /* current extent value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last extent used */ - xfs_bmbt_irec_t prev; /* previous extent value */ - - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return EIO; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - *last_block = 0; - return 0; - } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - bno = *last_block - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - if (eof || xfs_bmbt_get_startoff(ep) > bno) { - if (prev.br_startoff == NULLFILEOFF) - *last_block = 0; - else - *last_block = prev.br_startoff + prev.br_blockcount; - } - /* - * Otherwise *last_block is already the right answer. - */ - return 0; -} - -int -xfs_bmap_last_extent( - struct xfs_trans *tp, - struct xfs_inode *ip, - int whichfork, - struct xfs_bmbt_irec *rec, - int *is_empty) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - int error; - int nextents; - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(tp, ip, whichfork); - if (error) - return error; - } - - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *is_empty = 1; - return 0; - } - - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); - *is_empty = 0; - return 0; -} - -/* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. - * - * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be - * at, or past the EOF. - */ -STATIC int -xfs_bmap_isaeof( - struct xfs_bmalloca *bma, - int whichfork) -{ - struct xfs_bmbt_irec rec; - int is_empty; - int error; - - bma->aeof = 0; - error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, - &is_empty); - if (error) - return error; - - if (is_empty) { - bma->aeof = 1; - return 0; - } - - /* - * Check if we are allocation or past the last extent, or at least into - * the last delayed allocated extent. - */ - bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || - (bma->offset >= rec.br_startoff && - isnullstartblock(rec.br_startblock)); - return 0; -} - -/* - * Returns the file-relative block number of the first block past eof in - * the file. This is not based on i_size, it is based on the extent records. - * Returns 0 for local files, as they do not have extent records. - */ -int -xfs_bmap_last_offset( - struct xfs_inode *ip, - xfs_fileoff_t *last_block, - int whichfork) -{ - struct xfs_bmbt_irec rec; - int is_empty; - int error; - - *last_block = 0; - - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) - return 0; - - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return EIO; - - error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); - if (error || is_empty) - return error; - - *last_block = rec.br_startoff + rec.br_blockcount; - return 0; -} - -/* - * Returns whether the selected fork of the inode has exactly one - * block or not. For the data fork we check this matches di_size, - * implying the file's range is 0..bsize-1. - */ -int /* 1=>1 block, 0=>otherwise */ -xfs_bmap_one_block( - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ -{ - xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int rval; /* return value */ - xfs_bmbt_irec_t s; /* internal version of extent */ - -#ifndef DEBUG - if (whichfork == XFS_DATA_FORK) - return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; -#endif /* !DEBUG */ - if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) - return 0; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return 0; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_get_all(ep, &s); - rval = s.br_startoff == 0 && s.br_blockcount == 1; - if (rval && whichfork == XFS_DATA_FORK) - ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); - return rval; -} - -/* - * Extent tree manipulation functions used during allocation. - */ - -/* - * Convert a delayed allocation to a real allocation. - */ -STATIC int /* error */ -xfs_bmap_add_extent_delay_real( - struct xfs_bmalloca *bma) -{ - struct xfs_bmbt_irec *new = &bma->got; - int diff; /* temp value */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t new_endoff; /* end offset of new entry */ - xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ - /* left is 0, right is 1, prev is 2 */ - int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ - xfs_filblks_t da_new; /* new count del alloc blocks used */ - xfs_filblks_t da_old; /* old count del alloc blocks used */ - xfs_filblks_t temp=0; /* value for da_new calculations */ - xfs_filblks_t temp2=0;/* value for da_new calculations */ - int tmp_rval; /* partial logging flags */ - - ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); - - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - - XFS_STATS_INC(xs_add_exlist); - -#define LEFT r[0] -#define RIGHT r[1] -#define PREV r[2] - - /* - * Set up a bunch of variables to make the tests simpler. - */ - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_get_all(ep, &PREV); - new_endoff = new->br_startoff + new->br_blockcount; - ASSERT(PREV.br_startoff <= new->br_startoff); - ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); - - da_old = startblockval(PREV.br_startblock); - da_new = 0; - - /* - * Set flags determining what part of the previous delayed allocation - * extent is being replaced by a real allocation. - */ - if (PREV.br_startoff == new->br_startoff) - state |= BMAP_LEFT_FILLING; - if (PREV.br_startoff + PREV.br_blockcount == new_endoff) - state |= BMAP_RIGHT_FILLING; - - /* - * Check and set flags if this segment has a left neighbor. - * Don't set contiguous if the combined extent would be too large. - */ - if (bma->idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); - - if (isnullstartblock(LEFT.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - - /* - * Check and set flags if this segment has a right neighbor. - * Don't set contiguous if the combined extent would be too large. - * Also check for all-three-contiguous being too large. - */ - if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); - - if (isnullstartblock(RIGHT.br_startblock)) - state |= BMAP_RIGHT_DELAY; - } - - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == RIGHT.br_startblock && - new->br_state == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING)) != - (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; - - error = 0; - /* - * Switch out based on the FILLING and CONTIG state bits. - */ - switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The left and right neighbors are both contiguous with new. - */ - bma->idx--; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); - bma->ip->i_d.di_nextents--; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_delete(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The left neighbor is contiguous, the right is not. - */ - bma->idx--; - - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount, LEFT.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The right neighbor is contiguous, the left is not. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, PREV.br_startoff, - new->br_startblock, - PREV.br_blockcount + - RIGHT.br_blockcount, PREV.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: - /* - * Filling in all of a previously delayed allocation extent. - * Neither the left nor right neighbors are contiguous with - * the new one. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: - /* - * Filling in the first part of a previous delayed allocation. - * The left neighbor is contiguous. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - new->br_blockcount, - LEFT.br_state); - if (error) - goto done; - } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock)); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - bma->idx--; - break; - - case BMAP_LEFT_FILLING: - /* - * Filling in the first part of a previous delayed allocation. - * The left neighbor is not contiguous. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, new_endoff); - temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, - &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; - if (error) - goto done; - } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx + 1); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - break; - - case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Filling in the last part of a previous delayed allocation. - * The right neighbor is contiguous with the new allocation. - */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - RIGHT.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - RIGHT.br_blockcount, - RIGHT.br_state); - if (error) - goto done; - } - - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock)); - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - bma->idx++; - break; - - case BMAP_RIGHT_FILLING: - /* - * Filling in the last part of a previous delayed allocation. - * The right neighbor is not contiguous. - */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, 1, - &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; - if (error) - goto done; - } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - bma->idx++; - break; - - case 0: - /* - * Filling in the middle part of a previous delayed allocation. - * Contiguity is impossible here. - * This case is avoided almost all the time. - * - * We start with a delayed allocation: - * - * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ - * PREV @ idx - * - * and we are allocating: - * +rrrrrrrrrrrrrrrrr+ - * new - * - * and we set it up for insertion as: - * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ - * new - * PREV @ idx LEFT RIGHT - * inserted at idx + 1 - */ - temp = new->br_startoff - PREV.br_startoff; - temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ - LEFT = *new; - RIGHT.br_state = PREV.br_state; - RIGHT.br_startblock = nullstartblock( - (int)xfs_bmap_worst_indlen(bma->ip, temp2)); - RIGHT.br_startoff = new_endoff; - RIGHT.br_blockcount = temp2; - /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ - xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - 1, &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; - if (error) - goto done; - } - temp = xfs_bmap_worst_indlen(bma->ip, temp); - temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - if (diff > 0) { - error = xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - -((int64_t)diff), 0); - ASSERT(!error); - if (error) - goto done; - } - - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), - nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - - bma->idx++; - da_new = temp + temp2; - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_CONTIG: - case BMAP_RIGHT_CONTIG: - /* - * These cases are all impossible. - */ - ASSERT(0); - } - - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - da_old > 0, &tmp_logflags, XFS_DATA_FORK); - bma->logflags |= tmp_logflags; - if (error) - goto done; - } - - /* adjust for changes in reserved delayed indirect blocks */ - if (da_old || da_new) { - temp = da_new; - if (bma->cur) - temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); - if (temp < da_old) - xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - (int64_t)(da_old - temp), 0); - } - - /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; - - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); -done: - bma->logflags |= rval; - return error; -#undef LEFT -#undef RIGHT -#undef PREV -} - -/* - * Convert an unwritten allocation to a real allocation or vice versa. - */ -STATIC int /* error */ -xfs_bmap_add_extent_unwritten_real( - struct xfs_trans *tp, - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t *idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp) /* inode logging flags */ -{ - xfs_btree_cur_t *cur; /* btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t new_endoff; /* end offset of new entry */ - xfs_exntst_t newext; /* new extent state */ - xfs_exntst_t oldext; /* old extent state */ - xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ - /* left is 0, right is 1, prev is 2 */ - int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ - - *logflagsp = 0; - - cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - - ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); - - XFS_STATS_INC(xs_add_exlist); - -#define LEFT r[0] -#define RIGHT r[1] -#define PREV r[2] - - /* - * Set up a bunch of variables to make the tests simpler. - */ - error = 0; - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &PREV); - newext = new->br_state; - oldext = (newext == XFS_EXT_UNWRITTEN) ? - XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - ASSERT(PREV.br_state == oldext); - new_endoff = new->br_startoff + new->br_blockcount; - ASSERT(PREV.br_startoff <= new->br_startoff); - ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); - - /* - * Set flags determining what part of the previous oldext allocation - * extent is being replaced by a newext allocation. - */ - if (PREV.br_startoff == new->br_startoff) - state |= BMAP_LEFT_FILLING; - if (PREV.br_startoff + PREV.br_blockcount == new_endoff) - state |= BMAP_RIGHT_FILLING; - - /* - * Check and set flags if this segment has a left neighbor. - * Don't set contiguous if the combined extent would be too large. - */ - if (*idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); - - if (isnullstartblock(LEFT.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == newext && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - - /* - * Check and set flags if this segment has a right neighbor. - * Don't set contiguous if the combined extent would be too large. - * Also check for all-three-contiguous being too large. - */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); - if (isnullstartblock(RIGHT.br_startblock)) - state |= BMAP_RIGHT_DELAY; - } - - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == RIGHT.br_startblock && - newext == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING)) != - (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; - - /* - * Switch out based on the FILLING and CONTIG state bits. - */ - switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The left and right neighbors are both contiguous with new. - */ - --*idx; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_remove(ip, *idx + 1, 2, state); - ip->i_d.di_nextents -= 2; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The left neighbor is contiguous, the right is not. - */ - --*idx; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount, - LEFT.br_state))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The right neighbor is contiguous, the left is not. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: - /* - * Setting all of a previous oldext extent to newext. - * Neither the left nor right neighbors are contiguous with - * the new one. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: - /* - * Setting the first part of a previous oldext extent to newext. - * The left neighbor is contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - --*idx; - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + new->br_blockcount, - LEFT.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_FILLING: - /* - * Setting the first part of a previous oldext extent to newext. - * The left neighbor is not contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); - xfs_bmbt_set_startoff(ep, new_endoff); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - cur->bc_rec.b = *new; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting the last part of a previous oldext extent to newext. - * The right neighbor is contiguous with the new allocation. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - ++*idx; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto done; - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_RIGHT_FILLING: - /* - * Setting the last part of a previous oldext extent to newext. - * The right neighbor is not contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - ++*idx; - xfs_iext_insert(ip, *idx, 1, new, state); - - ip->i_d.di_nextents++; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case 0: - /* - * Setting the middle part of a previous oldext extent to - * newext. Contiguity is impossible here. - * One extent becomes three extents. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - r[0] = *new; - r[1].br_startoff = new_endoff; - r[1].br_blockcount = - PREV.br_startoff + PREV.br_blockcount - new_endoff; - r[1].br_startblock = new->br_startblock + new->br_blockcount; - r[1].br_state = oldext; - - ++*idx; - xfs_iext_insert(ip, *idx, 2, &r[0], state); - - ip->i_d.di_nextents += 2; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* new right extent - oldext */ - if ((error = xfs_bmbt_update(cur, r[1].br_startoff, - r[1].br_startblock, r[1].br_blockcount, - r[1].br_state))) - goto done; - /* new left extent - oldext */ - cur->bc_rec.b = PREV; - cur->bc_rec.b.br_blockcount = - new->br_startoff - PREV.br_startoff; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* - * Reset the cursor to the position of the new extent - * we are about to insert as we can't trust it after - * the previous insert. - */ - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - /* new middle extent - newext */ - cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_CONTIG: - case BMAP_RIGHT_CONTIG: - /* - * These cases are all impossible. - */ - ASSERT(0); - } - - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, - 0, &tmp_logflags, XFS_DATA_FORK); - *logflagsp |= tmp_logflags; - if (error) - goto done; - } - - /* clear out the allocated field, done with it now in any case. */ - if (cur) { - cur->bc_private.b.allocated = 0; - *curp = cur; - } - - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); -done: - *logflagsp |= rval; - return error; -#undef LEFT -#undef RIGHT -#undef PREV -} - -/* - * Convert a hole to a delayed allocation. - */ -STATIC void -xfs_bmap_add_extent_hole_delay( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t *idx, /* extent number to update/insert */ - xfs_bmbt_irec_t *new) /* new data to add to file extents */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t left; /* left neighbor extent entry */ - xfs_filblks_t newlen=0; /* new indirect size */ - xfs_filblks_t oldlen=0; /* old indirect size */ - xfs_bmbt_irec_t right; /* right neighbor extent entry */ - int state; /* state bits, accessed thru macros */ - xfs_filblks_t temp=0; /* temp for indirect calculations */ - - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - state = 0; - ASSERT(isnullstartblock(new->br_startblock)); - - /* - * Check and set flags if this segment has a left neighbor - */ - if (*idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); - - if (isnullstartblock(left.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - /* - * Check and set flags if the current (right) segment exists. - * If it doesn't exist, we're converting the hole at end-of-file. - */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); - - if (isnullstartblock(right.br_startblock)) - state |= BMAP_RIGHT_DELAY; - } - - /* - * Set contiguity flags on the left and right neighbors. - * Don't let extents get too large, even if the pieces are contiguous. - */ - if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - - if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!(state & BMAP_LEFT_CONTIG) || - (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN))) - state |= BMAP_RIGHT_CONTIG; - - /* - * Switch out based on the contiguity flags. - */ - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with delayed allocations - * on the left and on the right. - * Merge all three into a single extent record. - */ - --*idx; - temp = left.br_blockcount + new->br_blockcount + - right.br_blockcount; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_remove(ip, *idx + 1, 1, state); - break; - - case BMAP_LEFT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the left. - * Merge the new allocation with the left neighbor. - */ - --*idx; - temp = left.br_blockcount + new->br_blockcount; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - break; - - case BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the right. - * Merge the new allocation with the right neighbor. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - temp = new->br_blockcount + right.br_blockcount; - oldlen = startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, - nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - break; - - case 0: - /* - * New allocation is not contiguous with another - * delayed allocation. - * Insert a new entry. - */ - oldlen = newlen = 0; - xfs_iext_insert(ip, *idx, 1, new, state); - break; - } - if (oldlen != newlen) { - ASSERT(oldlen > newlen); - xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), 0); - /* - * Nothing to do for disk quota accounting here. - */ - } -} - -/* - * Convert a hole to a real allocation. - */ -STATIC int /* error */ -xfs_bmap_add_extent_hole_real( - struct xfs_bmalloca *bma, - int whichfork) -{ - struct xfs_bmbt_irec *new = &bma->got; - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t left; /* left neighbor extent entry */ - xfs_bmbt_irec_t right; /* right neighbor extent entry */ - int rval=0; /* return value (logging flags) */ - int state; /* state bits, accessed thru macros */ - - ifp = XFS_IFORK_PTR(bma->ip, whichfork); - - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - - XFS_STATS_INC(xs_add_exlist); - - state = 0; - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - - /* - * Check and set flags if this segment has a left neighbor. - */ - if (bma->idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); - if (isnullstartblock(left.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - /* - * Check and set flags if this segment has a current value. - * Not true if we're inserting into the "hole" at eof. - */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); - if (isnullstartblock(right.br_startblock)) - state |= BMAP_RIGHT_DELAY; - } - - /* - * We're inserting a real allocation between "left" and "right". - * Set the contiguity flags. Don't let extents get too large. - */ - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_startblock + left.br_blockcount == new->br_startblock && - left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_startblock + new->br_blockcount == right.br_startblock && - new->br_state == right.br_state && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!(state & BMAP_LEFT_CONTIG) || - left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; - - error = 0; - /* - * Select which case we're in here, and implement it. - */ - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with real allocations on the - * left and on the right. - * Merge all three into a single extent record. - */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - left.br_blockcount + new->br_blockcount + - right.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); - if (bma->cur == NULL) { - rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - } else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, - right.br_startblock, right.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_delete(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount + - right.br_blockcount, - left.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_CONTIG: - /* - * New allocation is contiguous with a real allocation - * on the left. - * Merge the new allocation with the left neighbor. - */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - if (bma->cur == NULL) { - rval = xfs_ilog_fext(whichfork); - } else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, - left.br_startblock, left.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount, - left.br_state); - if (error) - goto done; - } - break; - - case BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with a real allocation - * on the right. - * Merge the new allocation with the right neighbor. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + right.br_blockcount, - right.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - if (bma->cur == NULL) { - rval = xfs_ilog_fext(whichfork); - } else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, - right.br_startoff, - right.br_startblock, - right.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - right.br_blockcount, - right.br_state); - if (error) - goto done; - } - break; - - case 0: - /* - * New allocation is not contiguous with another - * real allocation. - * Insert a new entry. - */ - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); - if (bma->cur == NULL) { - rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - } else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, - new->br_startoff, - new->br_startblock, - new->br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = new->br_state; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - } - - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, whichfork)) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - 0, &tmp_logflags, whichfork); - bma->logflags |= tmp_logflags; - if (error) - goto done; - } - - /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; - - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); -done: - bma->logflags |= rval; - return error; -} - -/* - * Functions used in the extent read, allocate and remove paths - */ - -/* - * Adjust the size of the new extent based on di_extsize and rt extsize. - */ -int -xfs_bmap_extsize_align( - xfs_mount_t *mp, - xfs_bmbt_irec_t *gotp, /* next extent pointer */ - xfs_bmbt_irec_t *prevp, /* previous extent pointer */ - xfs_extlen_t extsz, /* align to this extent size */ - int rt, /* is this a realtime inode? */ - int eof, /* is extent at end-of-file? */ - int delay, /* creating delalloc extent? */ - int convert, /* overwriting unwritten extent? */ - xfs_fileoff_t *offp, /* in/out: aligned offset */ - xfs_extlen_t *lenp) /* in/out: aligned length */ -{ - xfs_fileoff_t orig_off; /* original offset */ - xfs_extlen_t orig_alen; /* original length */ - xfs_fileoff_t orig_end; /* original off+len */ - xfs_fileoff_t nexto; /* next file offset */ - xfs_fileoff_t prevo; /* previous file offset */ - xfs_fileoff_t align_off; /* temp for offset */ - xfs_extlen_t align_alen; /* temp for length */ - xfs_extlen_t temp; /* temp for calculations */ - - if (convert) - return 0; - - orig_off = align_off = *offp; - orig_alen = align_alen = *lenp; - orig_end = orig_off + orig_alen; - - /* - * If this request overlaps an existing extent, then don't - * attempt to perform any additional alignment. - */ - if (!delay && !eof && - (orig_off >= gotp->br_startoff) && - (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { - return 0; - } - - /* - * If the file offset is unaligned vs. the extent size - * we need to align it. This will be possible unless - * the file was previously written with a kernel that didn't - * perform this alignment, or if a truncate shot us in the - * foot. - */ - temp = do_mod(orig_off, extsz); - if (temp) { - align_alen += temp; - align_off -= temp; - } - /* - * Same adjustment for the end of the requested area. - */ - if ((temp = (align_alen % extsz))) { - align_alen += extsz - temp; - } - /* - * If the previous block overlaps with this proposed allocation - * then move the start forward without adjusting the length. - */ - if (prevp->br_startoff != NULLFILEOFF) { - if (prevp->br_startblock == HOLESTARTBLOCK) - prevo = prevp->br_startoff; - else - prevo = prevp->br_startoff + prevp->br_blockcount; - } else - prevo = 0; - if (align_off != orig_off && align_off < prevo) - align_off = prevo; - /* - * If the next block overlaps with this proposed allocation - * then move the start back without adjusting the length, - * but not before offset 0. - * This may of course make the start overlap previous block, - * and if we hit the offset 0 limit then the next block - * can still overlap too. - */ - if (!eof && gotp->br_startoff != NULLFILEOFF) { - if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || - (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) - nexto = gotp->br_startoff + gotp->br_blockcount; - else - nexto = gotp->br_startoff; - } else - nexto = NULLFILEOFF; - if (!eof && - align_off + align_alen != orig_end && - align_off + align_alen > nexto) - align_off = nexto > align_alen ? nexto - align_alen : 0; - /* - * If we're now overlapping the next or previous extent that - * means we can't fit an extsz piece in this hole. Just move - * the start forward to the first valid spot and set - * the length so we hit the end. - */ - if (align_off != orig_off && align_off < prevo) - align_off = prevo; - if (align_off + align_alen != orig_end && - align_off + align_alen > nexto && - nexto != NULLFILEOFF) { - ASSERT(nexto > prevo); - align_alen = nexto - align_off; - } - - /* - * If realtime, and the result isn't a multiple of the realtime - * extent size we need to remove blocks until it is. - */ - if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { - /* - * We're not covering the original request, or - * we won't be able to once we fix the length. - */ - if (orig_off < align_off || - orig_end > align_off + align_alen || - align_alen - temp < orig_alen) - return EINVAL; - /* - * Try to fix it by moving the start up. - */ - if (align_off + temp <= orig_off) { - align_alen -= temp; - align_off += temp; - } - /* - * Try to fix it by moving the end in. - */ - else if (align_off + align_alen - temp >= orig_end) - align_alen -= temp; - /* - * Set the start to the minimum then trim the length. - */ - else { - align_alen -= orig_off - align_off; - align_off = orig_off; - align_alen -= align_alen % mp->m_sb.sb_rextsize; - } - /* - * Result doesn't cover the request, fail it. - */ - if (orig_off < align_off || orig_end > align_off + align_alen) - return EINVAL; - } else { - ASSERT(orig_off >= align_off); - ASSERT(orig_end <= align_off + align_alen); - } - -#ifdef DEBUG - if (!eof && gotp->br_startoff != NULLFILEOFF) - ASSERT(align_off + align_alen <= gotp->br_startoff); - if (prevp->br_startoff != NULLFILEOFF) - ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); -#endif - - *lenp = align_alen; - *offp = align_off; - return 0; -} - -#define XFS_ALLOC_GAP_UNITS 4 - -void -xfs_bmap_adjacent( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ -{ - xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ - xfs_mount_t *mp; /* mount point structure */ - int nullfb; /* true if ap->firstblock isn't set */ - int rt; /* true if inode is realtime */ - -#define ISVALID(x,y) \ - (rt ? \ - (x) < mp->m_sb.sb_rblocks : \ - XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ - XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) - - mp = ap->ip->i_mount; - nullfb = *ap->firstblock == NULLFSBLOCK; - rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); - /* - * If allocating at eof, and there's a previous real block, - * try to use its last block as our starting point. - */ - if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prev.br_startblock) && - ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, - ap->prev.br_startblock)) { - ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; - /* - * Adjust for the gap between prevp and us. - */ - adjust = ap->offset - - (ap->prev.br_startoff + ap->prev.br_blockcount); - if (adjust && - ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) - ap->blkno += adjust; - } - /* - * If not at eof, then compare the two neighbor blocks. - * Figure out whether either one gives us a good starting point, - * and pick the better one. - */ - else if (!ap->eof) { - xfs_fsblock_t gotbno; /* right side block number */ - xfs_fsblock_t gotdiff=0; /* right side difference */ - xfs_fsblock_t prevbno; /* left side block number */ - xfs_fsblock_t prevdiff=0; /* left side difference */ - - /* - * If there's a previous (left) block, select a requested - * start block based on it. - */ - if (ap->prev.br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prev.br_startblock) && - (prevbno = ap->prev.br_startblock + - ap->prev.br_blockcount) && - ISVALID(prevbno, ap->prev.br_startblock)) { - /* - * Calculate gap to end of previous block. - */ - adjust = prevdiff = ap->offset - - (ap->prev.br_startoff + - ap->prev.br_blockcount); - /* - * Figure the startblock based on the previous block's - * end and the gap size. - * Heuristic! - * If the gap is large relative to the piece we're - * allocating, or using it gives us an invalid block - * number, then just use the end of the previous block. - */ - if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(prevbno + prevdiff, - ap->prev.br_startblock)) - prevbno += adjust; - else - prevdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) - prevbno = NULLFSBLOCK; - } - /* - * No previous block or can't follow it, just default. - */ - else - prevbno = NULLFSBLOCK; - /* - * If there's a following (right) block, select a requested - * start block based on it. - */ - if (!isnullstartblock(ap->got.br_startblock)) { - /* - * Calculate gap to start of next block. - */ - adjust = gotdiff = ap->got.br_startoff - ap->offset; - /* - * Figure the startblock based on the next block's - * start and the gap size. - */ - gotbno = ap->got.br_startblock; - /* - * Heuristic! - * If the gap is large relative to the piece we're - * allocating, or using it gives us an invalid block - * number, then just use the start of the next block - * offset by our length. - */ - if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(gotbno - gotdiff, gotbno)) - gotbno -= adjust; - else if (ISVALID(gotbno - ap->length, gotbno)) { - gotbno -= ap->length; - gotdiff += adjust - ap->length; - } else - gotdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) - gotbno = NULLFSBLOCK; - } - /* - * No next block, just default. - */ - else - gotbno = NULLFSBLOCK; - /* - * If both valid, pick the better one, else the only good - * one, else ap->blkno is already set (to 0 or the inode block). - */ - if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) - ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; - else if (prevbno != NULLFSBLOCK) - ap->blkno = prevbno; - else if (gotbno != NULLFSBLOCK) - ap->blkno = gotbno; - } -#undef ISVALID -} - -static int -xfs_bmap_longest_free_extent( - struct xfs_trans *tp, - xfs_agnumber_t ag, - xfs_extlen_t *blen, - int *notinit) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; - xfs_extlen_t longest; - int error = 0; - - pag = xfs_perag_get(mp, ag); - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); - if (error) - goto out; - - if (!pag->pagf_init) { - *notinit = 1; - goto out; - } - } - - longest = xfs_alloc_longest_free_extent(mp, pag); - if (*blen < longest) - *blen = longest; - -out: - xfs_perag_put(pag); - return error; -} - -static void -xfs_bmap_select_minlen( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args, - xfs_extlen_t *blen, - int notinit) -{ - if (notinit || *blen < ap->minlen) { - /* - * Since we did a BUF_TRYLOCK above, it is possible that - * there is space for this request. - */ - args->minlen = ap->minlen; - } else if (*blen < args->maxlen) { - /* - * If the best seen length is less than the request length, - * use the best as the minimum. - */ - args->minlen = *blen; - } else { - /* - * Otherwise we've seen an extent as big as maxlen, use that - * as the minimum. - */ - args->minlen = args->maxlen; - } -} - -STATIC int -xfs_bmap_btalloc_nullfb( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args, - xfs_extlen_t *blen) -{ - struct xfs_mount *mp = ap->ip->i_mount; - xfs_agnumber_t ag, startag; - int notinit = 0; - int error; - - args->type = XFS_ALLOCTYPE_START_BNO; - args->total = ap->total; - - startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); - if (startag == NULLAGNUMBER) - startag = ag = 0; - - while (*blen < args->maxlen) { - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, - ¬init); - if (error) - return error; - - if (++ag == mp->m_sb.sb_agcount) - ag = 0; - if (ag == startag) - break; - } - - xfs_bmap_select_minlen(ap, args, blen, notinit); - return 0; -} - -STATIC int -xfs_bmap_btalloc_filestreams( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args, - xfs_extlen_t *blen) -{ - struct xfs_mount *mp = ap->ip->i_mount; - xfs_agnumber_t ag; - int notinit = 0; - int error; - - args->type = XFS_ALLOCTYPE_NEAR_BNO; - args->total = ap->total; - - ag = XFS_FSB_TO_AGNO(mp, args->fsbno); - if (ag == NULLAGNUMBER) - ag = 0; - - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); - if (error) - return error; - - if (*blen < args->maxlen) { - error = xfs_filestream_new_ag(ap, &ag); - if (error) - return error; - - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, - ¬init); - if (error) - return error; - - } - - xfs_bmap_select_minlen(ap, args, blen, notinit); - - /* - * Set the failure fallback case to look in the selected AG as stream - * may have moved. - */ - ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); - return 0; -} - -STATIC int -xfs_bmap_btalloc( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ -{ - xfs_mount_t *mp; /* mount point structure */ - xfs_alloctype_t atype = 0; /* type for allocation routines */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ - xfs_agnumber_t ag; - xfs_alloc_arg_t args; - xfs_extlen_t blen; - xfs_extlen_t nextminlen = 0; - int nullfb; /* true if ap->firstblock isn't set */ - int isaligned; - int tryagain; - int error; - int stripe_align; - - ASSERT(ap->length); - - mp = ap->ip->i_mount; - - /* stripe alignment for allocation is determined by mount parameters */ - stripe_align = 0; - if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) - stripe_align = mp->m_swidth; - else if (mp->m_dalign) - stripe_align = mp->m_dalign; - - align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; - if (unlikely(align)) { - error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, - align, 0, ap->eof, 0, ap->conv, - &ap->offset, &ap->length); - ASSERT(!error); - ASSERT(ap->length); - } - - - nullfb = *ap->firstblock == NULLFSBLOCK; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); - if (nullfb) { - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { - ag = xfs_filestream_lookup_ag(ap->ip); - ag = (ag != NULLAGNUMBER) ? ag : 0; - ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); - } else { - ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); - } - } else - ap->blkno = *ap->firstblock; - - xfs_bmap_adjacent(ap); - - /* - * If allowed, use ap->blkno; otherwise must use firstblock since - * it's in the right allocation group. - */ - if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) - ; - else - ap->blkno = *ap->firstblock; - /* - * Normal allocation, done through xfs_alloc_vextent. - */ - tryagain = isaligned = 0; - memset(&args, 0, sizeof(args)); - args.tp = ap->tp; - args.mp = mp; - args.fsbno = ap->blkno; - - /* Trim the allocation back to the maximum an AG can fit. */ - args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); - args.firstblock = *ap->firstblock; - blen = 0; - if (nullfb) { - /* - * Search for an allocation group with a single extent large - * enough for the request. If one isn't found, then adjust - * the minimum allocation size to the largest space found. - */ - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) - error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); - else - error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); - if (error) - return error; - } else if (ap->flist->xbf_low) { - if (xfs_inode_is_filestream(ap->ip)) - args.type = XFS_ALLOCTYPE_FIRST_AG; - else - args.type = XFS_ALLOCTYPE_START_BNO; - args.total = args.minlen = ap->minlen; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.total = ap->total; - args.minlen = ap->minlen; - } - /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { - args.prod = align; - if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) - args.mod = (xfs_extlen_t)(args.prod - args.mod); - } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { - args.prod = 1; - args.mod = 0; - } else { - args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; - if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) - args.mod = (xfs_extlen_t)(args.prod - args.mod); - } - /* - * If we are not low on available data blocks, and the - * underlying logical volume manager is a stripe, and - * the file offset is zero then try to allocate data - * blocks on stripe unit boundary. - * NOTE: ap->aeof is only set if the allocation length - * is >= the stripe unit and the allocation offset is - * at the end of file. - */ - if (!ap->flist->xbf_low && ap->aeof) { - if (!ap->offset) { - args.alignment = stripe_align; - atype = args.type; - isaligned = 1; - /* - * Adjust for alignment - */ - if (blen > args.alignment && blen <= args.maxlen) - args.minlen = blen - args.alignment; - args.minalignslop = 0; - } else { - /* - * First try an exact bno allocation. - * If it fails then do a near or start bno - * allocation with alignment turned on. - */ - atype = args.type; - tryagain = 1; - args.type = XFS_ALLOCTYPE_THIS_BNO; - args.alignment = 1; - /* - * Compute the minlen+alignment for the - * next case. Set slop so that the value - * of minlen+alignment+slop doesn't go up - * between the calls. - */ - if (blen > stripe_align && blen <= args.maxlen) - nextminlen = blen - stripe_align; - else - nextminlen = args.minlen; - if (nextminlen + stripe_align > args.minlen + 1) - args.minalignslop = - nextminlen + stripe_align - - args.minlen - 1; - else - args.minalignslop = 0; - } - } else { - args.alignment = 1; - args.minalignslop = 0; - } - args.minleft = ap->minleft; - args.wasdel = ap->wasdel; - args.isfl = 0; - args.userdata = ap->userdata; - if ((error = xfs_alloc_vextent(&args))) - return error; - if (tryagain && args.fsbno == NULLFSBLOCK) { - /* - * Exact allocation failed. Now try with alignment - * turned on. - */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = stripe_align; - args.minlen = nextminlen; - args.minalignslop = 0; - isaligned = 1; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (isaligned && args.fsbno == NULLFSBLOCK) { - /* - * allocation failed, so turn off alignment and - * try again. - */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = 0; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (args.fsbno == NULLFSBLOCK && nullfb && - args.minlen > ap->minlen) { - args.minlen = ap->minlen; - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = ap->blkno; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (args.fsbno == NULLFSBLOCK && nullfb) { - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.total = ap->minlen; - args.minleft = 0; - if ((error = xfs_alloc_vextent(&args))) - return error; - ap->flist->xbf_low = 1; - } - if (args.fsbno != NULLFSBLOCK) { - /* - * check the allocation happened at the same or higher AG than - * the first block that was allocated. - */ - ASSERT(*ap->firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == - XFS_FSB_TO_AGNO(mp, args.fsbno) || - (ap->flist->xbf_low && - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < - XFS_FSB_TO_AGNO(mp, args.fsbno))); - - ap->blkno = args.fsbno; - if (*ap->firstblock == NULLFSBLOCK) - *ap->firstblock = args.fsbno; - ASSERT(nullfb || fb_agno == args.agno || - (ap->flist->xbf_low && fb_agno < args.agno)); - ap->length = args.len; - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; - /* - * Adjust the disk quota also. This was reserved - * earlier. - */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); - } else { - ap->blkno = NULLFSBLOCK; - ap->length = 0; - } - return 0; -} - -/* - * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. - * It figures out where to ask the underlying allocator to put the new extent. - */ -STATIC int -xfs_bmap_alloc( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ -{ - if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) - return xfs_bmap_rtalloc(ap); - return xfs_bmap_btalloc(ap); -} - -/* - * Trim the returned map to the required bounds - */ -STATIC void -xfs_bmapi_trim_map( - struct xfs_bmbt_irec *mval, - struct xfs_bmbt_irec *got, - xfs_fileoff_t *bno, - xfs_filblks_t len, - xfs_fileoff_t obno, - xfs_fileoff_t end, - int n, - int flags) -{ - if ((flags & XFS_BMAPI_ENTIRE) || - got->br_startoff + got->br_blockcount <= obno) { - *mval = *got; - if (isnullstartblock(got->br_startblock)) - mval->br_startblock = DELAYSTARTBLOCK; - return; - } - - if (obno > *bno) - *bno = obno; - ASSERT((*bno >= obno) || (n == 0)); - ASSERT(*bno < end); - mval->br_startoff = *bno; - if (isnullstartblock(got->br_startblock)) - mval->br_startblock = DELAYSTARTBLOCK; - else - mval->br_startblock = got->br_startblock + - (*bno - got->br_startoff); - /* - * Return the minimum of what we got and what we asked for for - * the length. We can use the len variable here because it is - * modified below and we could have been there before coming - * here if the first part of the allocation didn't overlap what - * was asked for. - */ - mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, - got->br_blockcount - (*bno - got->br_startoff)); - mval->br_state = got->br_state; - ASSERT(mval->br_blockcount <= len); - return; -} - -/* - * Update and validate the extent map to return - */ -STATIC void -xfs_bmapi_update_map( - struct xfs_bmbt_irec **map, - xfs_fileoff_t *bno, - xfs_filblks_t *len, - xfs_fileoff_t obno, - xfs_fileoff_t end, - int *n, - int flags) -{ - xfs_bmbt_irec_t *mval = *map; - - ASSERT((flags & XFS_BMAPI_ENTIRE) || - ((mval->br_startoff + mval->br_blockcount) <= end)); - ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || - (mval->br_startoff < obno)); - - *bno = mval->br_startoff + mval->br_blockcount; - *len = end - *bno; - if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { - /* update previous map with new information */ - ASSERT(mval->br_startblock == mval[-1].br_startblock); - ASSERT(mval->br_blockcount > mval[-1].br_blockcount); - ASSERT(mval->br_state == mval[-1].br_state); - mval[-1].br_blockcount = mval->br_blockcount; - mval[-1].br_state = mval->br_state; - } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && - mval[-1].br_startblock != DELAYSTARTBLOCK && - mval[-1].br_startblock != HOLESTARTBLOCK && - mval->br_startblock == mval[-1].br_startblock + - mval[-1].br_blockcount && - ((flags & XFS_BMAPI_IGSTATE) || - mval[-1].br_state == mval->br_state)) { - ASSERT(mval->br_startoff == - mval[-1].br_startoff + mval[-1].br_blockcount); - mval[-1].br_blockcount += mval->br_blockcount; - } else if (*n > 0 && - mval->br_startblock == DELAYSTARTBLOCK && - mval[-1].br_startblock == DELAYSTARTBLOCK && - mval->br_startoff == - mval[-1].br_startoff + mval[-1].br_blockcount) { - mval[-1].br_blockcount += mval->br_blockcount; - mval[-1].br_state = mval->br_state; - } else if (!((*n == 0) && - ((mval->br_startoff + mval->br_blockcount) <= - obno))) { - mval++; - (*n)++; - } - *map = mval; -} - -/* - * Map file blocks to filesystem blocks without allocation. - */ -int -xfs_bmapi_read( - struct xfs_inode *ip, - xfs_fileoff_t bno, - xfs_filblks_t len, - struct xfs_bmbt_irec *mval, - int *nmap, - int flags) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; - struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; - xfs_fileoff_t obno; - xfs_fileoff_t end; - xfs_extnum_t lastx; - int error; - int eof; - int n = 0; - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - - ASSERT(*nmap >= 1); - ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| - XFS_BMAPI_IGSTATE))); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - if (XFS_FORCED_SHUTDOWN(mp)) - return EIO; - - XFS_STATS_INC(xs_blk_mapr); - - ifp = XFS_IFORK_PTR(ip, whichfork); - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, whichfork); - if (error) - return error; - } - - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); - end = bno + len; - obno = bno; - - while (bno < end && n < *nmap) { - /* Reading past eof, act as though there's a hole up to end. */ - if (eof) - got.br_startoff = end; - if (got.br_startoff > bno) { - /* Reading in a hole. */ - mval->br_startoff = bno; - mval->br_startblock = HOLESTARTBLOCK; - mval->br_blockcount = - XFS_FILBLKS_MIN(len, got.br_startoff - bno); - mval->br_state = XFS_EXT_NORM; - bno += mval->br_blockcount; - len -= mval->br_blockcount; - mval++; - n++; - continue; - } - - /* set up the extent map to return. */ - xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); - xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); - - /* If we're done, stop now. */ - if (bno >= end || n >= *nmap) - break; - - /* Else go on to the next record. */ - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); - else - eof = 1; - } - *nmap = n; - return 0; -} - -STATIC int -xfs_bmapi_reserve_delalloc( - struct xfs_inode *ip, - xfs_fileoff_t aoff, - xfs_filblks_t len, - struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, - xfs_extnum_t *lastx, - int eof) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - xfs_extlen_t alen; - xfs_extlen_t indlen; - char rt = XFS_IS_REALTIME_INODE(ip); - xfs_extlen_t extsz; - int error; - - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); - if (!eof) - alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); - - /* Figure out the extent size, adjust alen */ - extsz = xfs_get_extsz_hint(ip); - if (extsz) { - /* - * Make sure we don't exceed a single extent length when we - * align the extent by reducing length we are going to - * allocate by the maximum amount extent size aligment may - * require. - */ - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, - 1, 0, &aoff, &alen); - ASSERT(!error); - } - - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; - - /* - * Make a transaction-less quota reservation for delayed allocation - * blocks. This number gets adjusted later. We return if we haven't - * allocated blocks already inside this loop. - */ - error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); - if (error) - return error; - - /* - * Split changing sb for alen and indlen since they could be coming - * from different places. - */ - indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); - ASSERT(indlen > 0); - - if (rt) { - error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - -((int64_t)extsz), 0); - } else { - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)alen), 0); - } - - if (error) - goto out_unreserve_quota; - - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)indlen), 0); - if (error) - goto out_unreserve_blocks; - - - ip->i_delayed_blks += alen; - - got->br_startoff = aoff; - got->br_startblock = nullstartblock(indlen); - got->br_blockcount = alen; - got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, lastx, got); - - /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); - - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); - return 0; - -out_unreserve_blocks: - if (rt) - xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); - else - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); -out_unreserve_quota: - if (XFS_IS_QUOTA_ON(mp)) - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); - return error; -} - -/* - * Map file blocks to filesystem blocks, adding delayed allocations as needed. - */ -int -xfs_bmapi_delay( - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting file offs. mapped */ - xfs_filblks_t len, /* length to map in file */ - struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - int flags) /* XFS_BMAPI_... */ -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - struct xfs_bmbt_irec got; /* current file extent record */ - struct xfs_bmbt_irec prev; /* previous file extent record */ - xfs_fileoff_t obno; /* old block number (offset) */ - xfs_fileoff_t end; /* end of mapped file region */ - xfs_extnum_t lastx; /* last useful extent number */ - int eof; /* we've hit the end of extents */ - int n = 0; /* current extent index */ - int error = 0; - - ASSERT(*nmap >= 1); - ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); - ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - if (XFS_FORCED_SHUTDOWN(mp)) - return EIO; - - XFS_STATS_INC(xs_blk_mapw); - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - return error; - } - - xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); - end = bno + len; - obno = bno; - - while (bno < end && n < *nmap) { - if (eof || got.br_startoff > bno) { - error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, - &prev, &lastx, eof); - if (error) { - if (n == 0) { - *nmap = 0; - return error; - } - break; - } - } - - /* set up the extent map to return. */ - xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); - xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); - - /* If we're done, stop now. */ - if (bno >= end || n >= *nmap) - break; - - /* Else go on to the next record. */ - prev = got; - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); - else - eof = 1; - } - - *nmap = n; - return 0; -} - - -int -__xfs_bmapi_allocate( - struct xfs_bmalloca *bma) -{ - struct xfs_mount *mp = bma->ip->i_mount; - int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); - int tmp_logflags = 0; - int error; - - ASSERT(bma->length > 0); - - /* - * For the wasdelay case, we could also just allocate the stuff asked - * for in this bmap call but that wouldn't be as good. - */ - if (bma->wasdel) { - bma->length = (xfs_extlen_t)bma->got.br_blockcount; - bma->offset = bma->got.br_startoff; - if (bma->idx != NULLEXTNUM && bma->idx) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), - &bma->prev); - } - } else { - bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); - if (!bma->eof) - bma->length = XFS_FILBLKS_MIN(bma->length, - bma->got.br_startoff - bma->offset); - } - - /* - * Indicate if this is the first user data in the file, or just any - * user data. - */ - if (!(bma->flags & XFS_BMAPI_METADATA)) { - bma->userdata = (bma->offset == 0) ? - XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; - } - - bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; - - /* - * Only want to do the alignment at the eof if it is userdata and - * allocation length is larger than a stripe unit. - */ - if (mp->m_dalign && bma->length >= mp->m_dalign && - !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { - error = xfs_bmap_isaeof(bma, whichfork); - if (error) - return error; - } - - error = xfs_bmap_alloc(bma); - if (error) - return error; - - if (bma->flist->xbf_low) - bma->minleft = 0; - if (bma->cur) - bma->cur->bc_private.b.firstblock = *bma->firstblock; - if (bma->blkno == NULLFSBLOCK) - return 0; - if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { - bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); - bma->cur->bc_private.b.firstblock = *bma->firstblock; - bma->cur->bc_private.b.flist = bma->flist; - } - /* - * Bump the number of extents we've allocated - * in this call. - */ - bma->nallocs++; - - if (bma->cur) - bma->cur->bc_private.b.flags = - bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; - - bma->got.br_startoff = bma->offset; - bma->got.br_startblock = bma->blkno; - bma->got.br_blockcount = bma->length; - bma->got.br_state = XFS_EXT_NORM; - - /* - * A wasdelay extent has been initialized, so shouldn't be flagged - * as unwritten. - */ - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && - xfs_sb_version_hasextflgbit(&mp->m_sb)) - bma->got.br_state = XFS_EXT_UNWRITTEN; - - if (bma->wasdel) - error = xfs_bmap_add_extent_delay_real(bma); - else - error = xfs_bmap_add_extent_hole_real(bma, whichfork); - - bma->logflags |= tmp_logflags; - if (error) - return error; - - /* - * Update our extent pointer, given that xfs_bmap_add_extent_delay_real - * or xfs_bmap_add_extent_hole_real might have merged it into one of - * the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); - - ASSERT(bma->got.br_startoff <= bma->offset); - ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= - bma->offset + bma->length); - ASSERT(bma->got.br_state == XFS_EXT_NORM || - bma->got.br_state == XFS_EXT_UNWRITTEN); - return 0; -} - -STATIC int -xfs_bmapi_convert_unwritten( - struct xfs_bmalloca *bma, - struct xfs_bmbt_irec *mval, - xfs_filblks_t len, - int flags) -{ - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); - int tmp_logflags = 0; - int error; - - /* check if we need to do unwritten->real conversion */ - if (mval->br_state == XFS_EXT_UNWRITTEN && - (flags & XFS_BMAPI_PREALLOC)) - return 0; - - /* check if we need to do real->unwritten conversion */ - if (mval->br_state == XFS_EXT_NORM && - (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != - (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) - return 0; - - /* - * Modify (by adding) the state flag, if writing. - */ - ASSERT(mval->br_blockcount <= len); - if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { - bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, - bma->ip, whichfork); - bma->cur->bc_private.b.firstblock = *bma->firstblock; - bma->cur->bc_private.b.flist = bma->flist; - } - mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) - ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, - &bma->cur, mval, bma->firstblock, bma->flist, - &tmp_logflags); - bma->logflags |= tmp_logflags; - if (error) - return error; - - /* - * Update our extent pointer, given that - * xfs_bmap_add_extent_unwritten_real might have merged it into one - * of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); - - /* - * We may have combined previously unwritten space with written space, - * so generate another request. - */ - if (mval->br_blockcount < len) - return EAGAIN; - return 0; -} - -/* - * Map file blocks to filesystem blocks, and allocate blocks or convert the - * extent state if necessary. Details behaviour is controlled by the flags - * parameter. Only allocates blocks from a single allocation group, to avoid - * locking problems. - * - * The returned value in "firstblock" from the first call in a transaction - * must be remembered and presented to subsequent calls in "firstblock". - * An upper bound for the number of blocks to be allocated is supplied to - * the first call in "total"; if no allocation group has that many free - * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). - */ -int -xfs_bmapi_write( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting file offs. mapped */ - xfs_filblks_t len, /* length to map in file */ - int flags, /* XFS_BMAPI_... */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_extlen_t total, /* total blocks needed */ - struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - struct xfs_bmap_free *flist) /* i/o: list extents to free */ -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; - struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ - xfs_fileoff_t end; /* end of mapped file region */ - int eof; /* after the end of extents */ - int error; /* error return */ - int n; /* current extent index */ - xfs_fileoff_t obno; /* old block number (offset) */ - int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ - -#ifdef DEBUG - xfs_fileoff_t orig_bno; /* original block number value */ - int orig_flags; /* original flags arg value */ - xfs_filblks_t orig_len; /* original value of len arg */ - struct xfs_bmbt_irec *orig_mval; /* original value of mval */ - int orig_nmap; /* original value of *nmap */ - - orig_bno = bno; - orig_len = len; - orig_flags = flags; - orig_mval = mval; - orig_nmap = *nmap; -#endif - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - - ASSERT(*nmap >= 1); - ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); - ASSERT(!(flags & XFS_BMAPI_IGSTATE)); - ASSERT(tp != NULL); - ASSERT(len > 0); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - if (XFS_FORCED_SHUTDOWN(mp)) - return EIO; - - ifp = XFS_IFORK_PTR(ip, whichfork); - - XFS_STATS_INC(xs_blk_mapw); - - if (*firstblock == NULLFSBLOCK) { - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) - bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; - else - bma.minleft = 1; - } else { - bma.minleft = 0; - } - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(tp, ip, whichfork); - if (error) - goto error0; - } - - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, - &bma.prev); - n = 0; - end = bno + len; - obno = bno; - - bma.tp = tp; - bma.ip = ip; - bma.total = total; - bma.userdata = 0; - bma.flist = flist; - bma.firstblock = firstblock; - - if (flags & XFS_BMAPI_STACK_SWITCH) - bma.stack_switch = 1; - - while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); - - /* - * First, deal with the hole before the allocated space - * that we found, if any. - */ - if (inhole || wasdelay) { - bma.eof = eof; - bma.conv = !!(flags & XFS_BMAPI_CONVERT); - bma.wasdel = wasdelay; - bma.offset = bno; - bma.flags = flags; - - /* - * There's a 32/64 bit type mismatch between the - * allocation length request (which can be 64 bits in - * length) and the bma length request, which is - * xfs_extlen_t and therefore 32 bits. Hence we have to - * check for 32-bit overflows and handle them here. - */ - if (len > (xfs_filblks_t)MAXEXTLEN) - bma.length = MAXEXTLEN; - else - bma.length = len; - - ASSERT(len > 0); - ASSERT(bma.length > 0); - error = xfs_bmapi_allocate(&bma); - if (error) - goto error0; - if (bma.blkno == NULLFSBLOCK) - break; - } - - /* Deal with the allocated space we found. */ - xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, - end, n, flags); - - /* Execute unwritten extent conversion if necessary */ - error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); - if (error == EAGAIN) - continue; - if (error) - goto error0; - - /* update the extent map to return */ - xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); - - /* - * If we're done, stop now. Stop when we've allocated - * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise - * the transaction may get too big. - */ - if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) - break; - - /* Else go on to the next record. */ - bma.prev = bma.got; - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), - &bma.got); - } else - eof = 1; - } - *nmap = n; - - /* - * Transform from btree to extents, give it cur. - */ - if (xfs_bmap_wants_extents(ip, whichfork)) { - int tmp_logflags = 0; - - ASSERT(bma.cur); - error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, - &tmp_logflags, whichfork); - bma.logflags |= tmp_logflags; - if (error) - goto error0; - } - - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || - XFS_IFORK_NEXTENTS(ip, whichfork) > - XFS_IFORK_MAXEXT(ip, whichfork)); - error = 0; -error0: - /* - * Log everything. Do this after conversion, there's no point in - * logging the extent records if we've converted to btree format. - */ - if ((bma.logflags & xfs_ilog_fext(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - bma.logflags &= ~xfs_ilog_fext(whichfork); - else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) - bma.logflags &= ~xfs_ilog_fbroot(whichfork); - /* - * Log whatever the flags say, even if error. Otherwise we might miss - * detecting a case where the data is changed, there's an error, - * and it's not logged so we don't shutdown when we should. - */ - if (bma.logflags) - xfs_trans_log_inode(tp, ip, bma.logflags); - - if (bma.cur) { - if (!error) { - ASSERT(*firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *firstblock) == - XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock) || - (flist->xbf_low && - XFS_FSB_TO_AGNO(mp, *firstblock) < - XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock))); - *firstblock = bma.cur->bc_private.b.firstblock; - } - xfs_btree_del_cursor(bma.cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - } - if (!error) - xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, - orig_nmap, *nmap); - return error; -} - -/* - * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_del_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t *idx, /* extent number to update/delete */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *del, /* data to remove from extents */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ - xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ - xfs_fsblock_t del_endblock=0; /* first block past del */ - xfs_fileoff_t del_endoff; /* first offset past del */ - int delay; /* current block is delayed allocated */ - int do_fx; /* free extent at end of routine */ - xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ - int error; /* error return value */ - int flags; /* inode logging flags */ - xfs_bmbt_irec_t got; /* current extent entry */ - xfs_fileoff_t got_endoff; /* first offset past got */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_mount_t *mp; /* mount structure */ - xfs_filblks_t nblks; /* quota/sb block count */ - xfs_bmbt_irec_t new; /* new record to be inserted */ - /* REFERENCED */ - uint qfield; /* quota field to update */ - xfs_filblks_t temp; /* for indirect length calculations */ - xfs_filblks_t temp2; /* for indirect length calculations */ - int state = 0; - - XFS_STATS_INC(xs_del_exlist); - - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &got); - ASSERT(got.br_startoff <= del->br_startoff); - del_endoff = del->br_startoff + del->br_blockcount; - got_endoff = got.br_startoff + got.br_blockcount; - ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - flags = 0; - qfield = 0; - error = 0; - /* - * If deleting a real allocation, must free up the disk space. - */ - if (!delay) { - flags = XFS_ILOG_CORE; - /* - * Realtime allocation. Free it and record di_nblocks update. - */ - if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; - xfs_filblks_t len; - - ASSERT(do_mod(del->br_blockcount, - mp->m_sb.sb_rextsize) == 0); - ASSERT(do_mod(del->br_startblock, - mp->m_sb.sb_rextsize) == 0); - bno = del->br_startblock; - len = del->br_blockcount; - do_div(bno, mp->m_sb.sb_rextsize); - do_div(len, mp->m_sb.sb_rextsize); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; - do_fx = 0; - nblks = len * mp->m_sb.sb_rextsize; - qfield = XFS_TRANS_DQ_RTBCOUNT; - } - /* - * Ordinary allocation. - */ - else { - do_fx = 1; - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - } - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - if (cur) { - if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, got.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; - do_fx = 0; - } - /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. - */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: - /* - * Matches the whole extent. Delete the entry. - */ - xfs_iext_remove(ip, *idx, 1, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - --*idx; - if (delay) - break; - - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - flags |= XFS_ILOG_CORE; - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; - } - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - break; - - case 2: - /* - * Deleting the first part of the extent. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; - } - if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) - goto done; - break; - - case 1: - /* - * Deleting the last part of the extent. - */ - temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; - } - if ((error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) - goto done; - break; - - case 0: - /* - * Deleting the middle of the extent. - */ - temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; - new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - flags |= XFS_ILOG_CORE; - if (cur) { - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, temp, - got.br_state))) - goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto done; - cur->bc_rec.b = new; - error = xfs_btree_insert(cur, &i); - if (error && error != ENOSPC) - goto done; - /* - * If get no-space back from btree insert, - * it tried a split, and we have a zero - * block reservation. - * Fix up our state and return the error. - */ - if (error == ENOSPC) { - /* - * Reset the cursor, don't trust - * it after any insert operation. - */ - if ((error = xfs_bmbt_lookup_eq(cur, - got.br_startoff, - got.br_startblock, - temp, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* - * Update the btree record back - * to the original value. - */ - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state))) - goto done; - /* - * Reset the extent record back - * to the original value. - */ - xfs_bmbt_set_blockcount(ep, - got.br_blockcount); - flags = 0; - error = ENOSPC; - goto done; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } else - flags |= xfs_ilog_fext(whichfork); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); - } else { - ASSERT(whichfork == XFS_DATA_FORK); - temp = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); - new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } - } - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, *idx + 1, 1, &new, state); - ++*idx; - break; - } - /* - * If we need to, add to list of extents to delete. - */ - if (do_fx) - xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, - mp); - /* - * Adjust inode # blocks in the file. - */ - if (nblks) - ip->i_d.di_nblocks -= nblks; - /* - * Adjust quota data. - */ - if (qfield) - xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); - - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), 0); - } -done: - *logflagsp = flags; - return error; -} - -/* - * Unmap (remove) blocks from a file. - * If nexts is nonzero then the number of extents to remove is limited to - * that value. If not all extents in the block range can be removed then - * *done is set. - */ -int /* error */ -xfs_bunmapi( - xfs_trans_t *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting offset to unmap */ - xfs_filblks_t len, /* length to unmap in file */ - int flags, /* misc flags */ - xfs_extnum_t nexts, /* number of extents max */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *done) /* set if not done yet */ -{ - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_irec_t del; /* extent being deleted */ - int eof; /* is deleting at eof */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - int error; /* error return value */ - xfs_extnum_t extno; /* extent number in list */ - xfs_bmbt_irec_t got; /* current extent record */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int isrt; /* freeing in rt area */ - xfs_extnum_t lastx; /* last extent index used */ - int logflags; /* transaction logging flags */ - xfs_extlen_t mod; /* rt extent offset */ - xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_irec_t prev; /* previous extent record */ - xfs_fileoff_t start; /* first file offset deleted */ - int tmp_logflags; /* partial logging flags */ - int wasdel; /* was a delayed alloc extent */ - int whichfork; /* data or attribute fork */ - xfs_fsblock_t sum; - - trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); - - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - ifp = XFS_IFORK_PTR(ip, whichfork); - if (unlikely( - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, - ip->i_mount); - return EFSCORRUPTED; - } - mp = ip->i_mount; - if (XFS_FORCED_SHUTDOWN(mp)) - return EIO; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(len > 0); - ASSERT(nexts >= 0); - - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *done = 1; - return 0; - } - XFS_STATS_INC(xs_blk_unmap); - isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); - start = bno; - bno = start + len - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - - /* - * Check to see if the given block number is past the end of the - * file, back up to the last block if so... - */ - if (eof) { - ep = xfs_iext_get_ext(ifp, --lastx); - xfs_bmbt_get_all(ep, &got); - bno = got.br_startoff + got.br_blockcount - 1; - } - logflags = 0; - if (ifp->if_flags & XFS_IFBROOT) { - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.flist = flist; - cur->bc_private.b.flags = 0; - } else - cur = NULL; - - if (isrt) { - /* - * Synchronize by locking the bitmap inode. - */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); - } - - extno = 0; - while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && - (nexts == 0 || extno < nexts)) { - /* - * Is the found extent after a hole in which bno lives? - * Just back up to the previous extent, if so. - */ - if (got.br_startoff > bno) { - if (--lastx < 0) - break; - ep = xfs_iext_get_ext(ifp, lastx); - xfs_bmbt_get_all(ep, &got); - } - /* - * Is the last block of this extent before the range - * we're supposed to delete? If so, we're done. - */ - bno = XFS_FILEOFF_MIN(bno, - got.br_startoff + got.br_blockcount - 1); - if (bno < start) - break; - /* - * Then deal with the (possibly delayed) allocated space - * we found. - */ - ASSERT(ep != NULL); - del = got; - wasdel = isnullstartblock(del.br_startblock); - if (got.br_startoff < start) { - del.br_startoff = start; - del.br_blockcount -= start - got.br_startoff; - if (!wasdel) - del.br_startblock += start - got.br_startoff; - } - if (del.br_startoff + del.br_blockcount > bno + 1) - del.br_blockcount = bno + 1 - del.br_startoff; - sum = del.br_startblock + del.br_blockcount; - if (isrt && - (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { - /* - * Realtime extent not lined up at the end. - * The extent could have been split into written - * and unwritten pieces, or we could just be - * unmapping part of it. But we can't really - * get rid of part of a realtime extent. - */ - if (del.br_state == XFS_EXT_UNWRITTEN || - !xfs_sb_version_hasextflgbit(&mp->m_sb)) { - /* - * This piece is unwritten, or we're not - * using unwritten extents. Skip over it. - */ - ASSERT(bno >= mod); - bno -= mod > del.br_blockcount ? - del.br_blockcount : mod; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(xfs_iext_get_ext( - ifp, lastx), &got); - } - continue; - } - /* - * It's written, turn it unwritten. - * This is better than zeroing it. - */ - ASSERT(del.br_state == XFS_EXT_NORM); - ASSERT(xfs_trans_get_block_res(tp) > 0); - /* - * If this spans a realtime extent boundary, - * chop it back to the start of the one we end at. - */ - if (del.br_blockcount > mod) { - del.br_startoff += del.br_blockcount - mod; - del.br_startblock += del.br_blockcount - mod; - del.br_blockcount = mod; - } - del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent_unwritten_real(tp, ip, - &lastx, &cur, &del, firstblock, flist, - &logflags); - if (error) - goto error0; - goto nodelete; - } - if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { - /* - * Realtime extent is lined up at the end but not - * at the front. We'll get rid of full extents if - * we can. - */ - mod = mp->m_sb.sb_rextsize - mod; - if (del.br_blockcount > mod) { - del.br_blockcount -= mod; - del.br_startoff += mod; - del.br_startblock += mod; - } else if ((del.br_startoff == start && - (del.br_state == XFS_EXT_UNWRITTEN || - xfs_trans_get_block_res(tp) == 0)) || - !xfs_sb_version_hasextflgbit(&mp->m_sb)) { - /* - * Can't make it unwritten. There isn't - * a full extent here so just skip it. - */ - ASSERT(bno >= del.br_blockcount); - bno -= del.br_blockcount; - if (got.br_startoff > bno) { - if (--lastx >= 0) { - ep = xfs_iext_get_ext(ifp, - lastx); - xfs_bmbt_get_all(ep, &got); - } - } - continue; - } else if (del.br_state == XFS_EXT_UNWRITTEN) { - /* - * This one is already unwritten. - * It must have a written left neighbor. - * Unwrite the killed part of that one and - * try again. - */ - ASSERT(lastx > 0); - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - lastx - 1), &prev); - ASSERT(prev.br_state == XFS_EXT_NORM); - ASSERT(!isnullstartblock(prev.br_startblock)); - ASSERT(del.br_startblock == - prev.br_startblock + prev.br_blockcount); - if (prev.br_startoff < start) { - mod = start - prev.br_startoff; - prev.br_blockcount -= mod; - prev.br_startblock += mod; - prev.br_startoff = start; - } - prev.br_state = XFS_EXT_UNWRITTEN; - lastx--; - error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &prev, - firstblock, flist, &logflags); - if (error) - goto error0; - goto nodelete; - } else { - ASSERT(del.br_state == XFS_EXT_NORM); - del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &del, - firstblock, flist, &logflags); - if (error) - goto error0; - goto nodelete; - } - } - if (wasdel) { - ASSERT(startblockval(del.br_startblock) > 0); - /* Update realtime/data freespace, unreserve quota */ - if (isrt) { - xfs_filblks_t rtexts; - - rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); - do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, 0); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_RTBLKS); - } else { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, 0); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_REGBLKS); - } - ip->i_delayed_blks -= del.br_blockcount; - if (cur) - cur->bc_private.b.flags |= - XFS_BTCUR_BPRV_WASDEL; - } else if (cur) - cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; - /* - * If it's the case where the directory code is running - * with no block reservation, and the deleted block is in - * the middle of its extent, and the resulting insert - * of an extent would cause transformation to btree format, - * then reject it. The calling code will then swap - * blocks around instead. - * We have to do this now, rather than waiting for the - * conversion to btree format, since the transaction - * will be dirty. - */ - if (!wasdel && xfs_trans_get_block_res(tp) == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ - XFS_IFORK_MAXEXT(ip, whichfork) && - del.br_startoff > got.br_startoff && - del.br_startoff + del.br_blockcount < - got.br_startoff + got.br_blockcount) { - error = ENOSPC; - goto error0; - } - error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, - &tmp_logflags, whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; - bno = del.br_startoff - 1; -nodelete: - /* - * If not done go on to the next (previous) record. - */ - if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= 0) { - ep = xfs_iext_get_ext(ifp, lastx); - if (xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, - lastx); - } - xfs_bmbt_get_all(ep, &got); - } - extno++; - } - } - *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; - - /* - * Convert to a btree if necessary. - */ - if (xfs_bmap_needs_btree(ip, whichfork)) { - ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, - &cur, 0, &tmp_logflags, whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; - } - /* - * transform from btree to extents, give it cur - */ - else if (xfs_bmap_wants_extents(ip, whichfork)) { - ASSERT(cur != NULL); - error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, - whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; - } - /* - * transform from extents to local? - */ - error = 0; -error0: - /* - * Log everything. Do this after conversion, there's no point in - * logging the extent records if we've converted to btree format. - */ - if ((logflags & xfs_ilog_fext(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - logflags &= ~xfs_ilog_fext(whichfork); - else if ((logflags & xfs_ilog_fbroot(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) - logflags &= ~xfs_ilog_fbroot(whichfork); - /* - * Log inode even in the error case, if the transaction - * is dirty we'll need to shut down the filesystem. - */ - if (logflags) - xfs_trans_log_inode(tp, ip, logflags); - if (cur) { - if (!error) { - *firstblock = cur->bc_private.b.firstblock; - cur->bc_private.b.allocated = 0; - } - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - } - return error; -} - -/* - * Shift extent records to the left to cover a hole. - * - * The maximum number of extents to be shifted in a single operation - * is @num_exts, and @current_ext keeps track of the current extent - * index we have shifted. @offset_shift_fsb is the length by which each - * extent is shifted. If there is no hole to shift the extents - * into, this will be considered invalid operation and we abort immediately. - */ -int -xfs_bmap_shift_extents( - struct xfs_trans *tp, - struct xfs_inode *ip, - int *done, - xfs_fileoff_t start_fsb, - xfs_fileoff_t offset_shift_fsb, - xfs_extnum_t *current_ext, - xfs_fsblock_t *firstblock, - struct xfs_bmap_free *flist, - int num_exts) -{ - struct xfs_btree_cur *cur; - struct xfs_bmbt_rec_host *gotp; - struct xfs_bmbt_irec got; - struct xfs_bmbt_irec left; - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; - xfs_extnum_t nexts = 0; - xfs_fileoff_t startoff; - int error = 0; - int i; - int whichfork = XFS_DATA_FORK; - int logflags; - xfs_filblks_t blockcount = 0; - int total_extents; - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmap_shift_extents", - XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - if (XFS_FORCED_SHUTDOWN(mp)) - return EIO; - - ASSERT(current_ext != NULL); - - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - /* Read in all the extents */ - error = xfs_iread_extents(tp, ip, whichfork); - if (error) - return error; - } - - /* - * If *current_ext is 0, we would need to lookup the extent - * from where we would start shifting and store it in gotp. - */ - if (!*current_ext) { - gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); - /* - * gotp can be null in 2 cases: 1) if there are no extents - * or 2) start_fsb lies in a hole beyond which there are - * no extents. Either way, we are done. - */ - if (!gotp) { - *done = 1; - return 0; - } - } - - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; - if (ifp->if_flags & XFS_IFBROOT) { - cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.flist = flist; - cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; - } - - /* - * There may be delalloc extents in the data fork before the range we - * are collapsing out, so we cannot - * use the count of real extents here. Instead we have to calculate it - * from the incore fork. - */ - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - while (nexts++ < num_exts && *current_ext < total_extents) { - - gotp = xfs_iext_get_ext(ifp, *current_ext); - xfs_bmbt_get_all(gotp, &got); - startoff = got.br_startoff - offset_shift_fsb; - - /* - * Before shifting extent into hole, make sure that the hole - * is large enough to accomodate the shift. - */ - if (*current_ext) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - *current_ext - 1), &left); - - if (startoff < left.br_startoff + left.br_blockcount) - error = EINVAL; - } else if (offset_shift_fsb > got.br_startoff) { - /* - * When first extent is shifted, offset_shift_fsb - * should be less than the stating offset of - * the first extent. - */ - error = EINVAL; - } - - if (error) - goto del_cursor; - - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - /* Check if we can merge 2 adjacent extents */ - if (*current_ext && - left.br_startoff + left.br_blockcount == startoff && - left.br_startblock + left.br_blockcount == - got.br_startblock && - left.br_state == got.br_state && - left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { - blockcount = left.br_blockcount + - got.br_blockcount; - xfs_iext_remove(ip, *current_ext, 1, 0); - if (cur) { - error = xfs_btree_delete(cur, &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - gotp = xfs_iext_get_ext(ifp, --*current_ext); - xfs_bmbt_get_all(gotp, &got); - - /* Make cursor point to the extent we will update */ - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - xfs_bmbt_set_blockcount(gotp, blockcount); - got.br_blockcount = blockcount; - } else { - /* We have to update the startoff */ - xfs_bmbt_set_startoff(gotp, startoff); - got.br_startoff = startoff; - } - - if (cur) { - error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state); - if (error) - goto del_cursor; - } - - (*current_ext)++; - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - } - - /* Check if we are done */ - if (*current_ext == total_extents) - *done = 1; - -del_cursor: - if (cur) - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - - xfs_trans_log_inode(tp, ip, logflags); - return error; -} diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c deleted file mode 100644 index de65bb8bab04..000000000000 --- a/fs/xfs/xfs_bmap_btree.c +++ /dev/null @@ -1,967 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_dinode.h" - -/* - * Determine the extent state. - */ -/* ARGSUSED */ -STATIC xfs_exntst_t -xfs_extent_state( - xfs_filblks_t blks, - int extent_flag) -{ - if (extent_flag) { - ASSERT(blks != 0); /* saved for DMIG */ - return XFS_EXT_UNWRITTEN; - } - return XFS_EXT_NORM; -} - -/* - * Convert on-disk form of btree root to in-memory form. - */ -void -xfs_bmdr_to_bmbt( - struct xfs_inode *ip, - xfs_bmdr_block_t *dblock, - int dblocklen, - struct xfs_btree_block *rblock, - int rblocklen) -{ - struct xfs_mount *mp = ip->i_mount; - int dmxr; - xfs_bmbt_key_t *fkp; - __be64 *fpp; - xfs_bmbt_key_t *tkp; - __be64 *tpp; - - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); - - rblock->bb_level = dblock->bb_level; - ASSERT(be16_to_cpu(rblock->bb_level) > 0); - rblock->bb_numrecs = dblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMDR_KEY_ADDR(dblock, 1); - tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); - tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - dmxr = be16_to_cpu(dblock->bb_numrecs); - memcpy(tkp, fkp, sizeof(*fkp) * dmxr); - memcpy(tpp, fpp, sizeof(*fpp) * dmxr); -} - -/* - * Convert a compressed bmap extent record to an uncompressed form. - * This code must be in sync with the routines xfs_bmbt_get_startoff, - * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. - */ -STATIC void -__xfs_bmbt_get_all( - __uint64_t l0, - __uint64_t l1, - xfs_bmbt_irec_t *s) -{ - int ext_flag; - xfs_exntst_t st; - - ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); - s->br_startoff = ((xfs_fileoff_t)l0 & - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -#if XFS_BIG_BLKNOS - s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | - (((xfs_fsblock_t)l1) >> 21); -#else -#ifdef DEBUG - { - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - s->br_startblock = (xfs_fsblock_t)b; - } -#else /* !DEBUG */ - s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ - s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); - /* This is xfs_extent_state() in-line */ - if (ext_flag) { - ASSERT(s->br_blockcount != 0); /* saved for DMIG */ - st = XFS_EXT_UNWRITTEN; - } else - st = XFS_EXT_NORM; - s->br_state = st; -} - -void -xfs_bmbt_get_all( - xfs_bmbt_rec_host_t *r, - xfs_bmbt_irec_t *s) -{ - __xfs_bmbt_get_all(r->l0, r->l1, s); -} - -/* - * Extract the blockcount field from an in memory bmap extent record. - */ -xfs_filblks_t -xfs_bmbt_get_blockcount( - xfs_bmbt_rec_host_t *r) -{ - return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); -} - -/* - * Extract the startblock field from an in memory bmap extent record. - */ -xfs_fsblock_t -xfs_bmbt_get_startblock( - xfs_bmbt_rec_host_t *r) -{ -#if XFS_BIG_BLKNOS - return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | - (((xfs_fsblock_t)r->l1) >> 21); -#else -#ifdef DEBUG - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)r->l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - return (xfs_fsblock_t)b; -#else /* !DEBUG */ - return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ -} - -/* - * Extract the startoff field from an in memory bmap extent record. - */ -xfs_fileoff_t -xfs_bmbt_get_startoff( - xfs_bmbt_rec_host_t *r) -{ - return ((xfs_fileoff_t)r->l0 & - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -} - -xfs_exntst_t -xfs_bmbt_get_state( - xfs_bmbt_rec_host_t *r) -{ - int ext_flag; - - ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN)); - return xfs_extent_state(xfs_bmbt_get_blockcount(r), - ext_flag); -} - -/* - * Extract the blockcount field from an on disk bmap extent record. - */ -xfs_filblks_t -xfs_bmbt_disk_get_blockcount( - xfs_bmbt_rec_t *r) -{ - return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); -} - -/* - * Extract the startoff field from a disk format bmap extent record. - */ -xfs_fileoff_t -xfs_bmbt_disk_get_startoff( - xfs_bmbt_rec_t *r) -{ - return ((xfs_fileoff_t)be64_to_cpu(r->l0) & - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -} - - -/* - * Set all the fields in a bmap extent record from the arguments. - */ -void -xfs_bmbt_set_allf( - xfs_bmbt_rec_host_t *r, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state) -{ - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; - - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - -#if XFS_BIG_BLKNOS - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); - - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - ((xfs_bmbt_rec_base_t)startblock >> 43); - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9); - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ -} - -/* - * Set all the fields in a bmap extent record from the uncompressed form. - */ -void -xfs_bmbt_set_all( - xfs_bmbt_rec_host_t *r, - xfs_bmbt_irec_t *s) -{ - xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock, - s->br_blockcount, s->br_state); -} - - -/* - * Set all the fields in a disk format bmap extent record from the arguments. - */ -void -xfs_bmbt_disk_set_allf( - xfs_bmbt_rec_t *r, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state) -{ - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; - - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - -#if XFS_BIG_BLKNOS - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); - - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - ((xfs_bmbt_rec_base_t)startblock >> 43)); - r->l1 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); - r->l1 = cpu_to_be64(xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } else { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9)); - r->l1 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } -#endif /* XFS_BIG_BLKNOS */ -} - -/* - * Set all the fields in a bmap extent record from the uncompressed form. - */ -STATIC void -xfs_bmbt_disk_set_all( - xfs_bmbt_rec_t *r, - xfs_bmbt_irec_t *s) -{ - xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock, - s->br_blockcount, s->br_state); -} - -/* - * Set the blockcount field in a bmap extent record. - */ -void -xfs_bmbt_set_blockcount( - xfs_bmbt_rec_host_t *r, - xfs_filblks_t v) -{ - ASSERT((v & xfs_mask64hi(43)) == 0); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | - (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); -} - -/* - * Set the startblock field in a bmap extent record. - */ -void -xfs_bmbt_set_startblock( - xfs_bmbt_rec_host_t *r, - xfs_fsblock_t v) -{ -#if XFS_BIG_BLKNOS - ASSERT((v & xfs_mask64hi(12)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | - (xfs_bmbt_rec_base_t)(v >> 43); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | - (xfs_bmbt_rec_base_t)(v << 21); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(v)) { - r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ -} - -/* - * Set the startoff field in a bmap extent record. - */ -void -xfs_bmbt_set_startoff( - xfs_bmbt_rec_host_t *r, - xfs_fileoff_t v) -{ - ASSERT((v & xfs_mask64hi(9)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | - ((xfs_bmbt_rec_base_t)v << 9) | - (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); -} - -/* - * Set the extent state field in a bmap extent record. - */ -void -xfs_bmbt_set_state( - xfs_bmbt_rec_host_t *r, - xfs_exntst_t v) -{ - ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); - if (v == XFS_EXT_NORM) - r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); - else - r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); -} - -/* - * Convert in-memory form of btree root to on-disk form. - */ -void -xfs_bmbt_to_bmdr( - struct xfs_mount *mp, - struct xfs_btree_block *rblock, - int rblocklen, - xfs_bmdr_block_t *dblock, - int dblocklen) -{ - int dmxr; - xfs_bmbt_key_t *fkp; - __be64 *fpp; - xfs_bmbt_key_t *tkp; - __be64 *tpp; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); - ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)); - ASSERT(rblock->bb_u.l.bb_blkno == - cpu_to_be64(XFS_BUF_DADDR_NULL)); - } else - ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); - ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_level != 0); - dblock->bb_level = rblock->bb_level; - dblock->bb_numrecs = rblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - tkp = XFS_BMDR_KEY_ADDR(dblock, 1); - fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); - dmxr = be16_to_cpu(dblock->bb_numrecs); - memcpy(tkp, fkp, sizeof(*fkp) * dmxr); - memcpy(tpp, fpp, sizeof(*fpp) * dmxr); -} - -/* - * Check extent records, which have just been read, for - * any bit in the extent flag field. ASSERT on debug - * kernels, as this condition should not occur. - * Return an error condition (1) if any flags found, - * otherwise return 0. - */ - -int -xfs_check_nostate_extents( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - xfs_extnum_t num) -{ - for (; num > 0; num--, idx++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); - if ((ep->l0 >> - (64 - BMBT_EXNTFLAG_BITLEN)) != 0) { - ASSERT(0); - return 1; - } - } - return 0; -} - - -STATIC struct xfs_btree_cur * -xfs_bmbt_dup_cursor( - struct xfs_btree_cur *cur) -{ - struct xfs_btree_cur *new; - - new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.b.ip, cur->bc_private.b.whichfork); - - /* - * Copy the firstblock, flist, and flags values, - * since init cursor doesn't get them. - */ - new->bc_private.b.firstblock = cur->bc_private.b.firstblock; - new->bc_private.b.flist = cur->bc_private.b.flist; - new->bc_private.b.flags = cur->bc_private.b.flags; - - return new; -} - -STATIC void -xfs_bmbt_update_cursor( - struct xfs_btree_cur *src, - struct xfs_btree_cur *dst) -{ - ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || - (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); - ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); - - dst->bc_private.b.allocated += src->bc_private.b.allocated; - dst->bc_private.b.firstblock = src->bc_private.b.firstblock; - - src->bc_private.b.allocated = 0; -} - -STATIC int -xfs_bmbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) -{ - xfs_alloc_arg_t args; /* block allocation args */ - int error; /* error return value */ - - memset(&args, 0, sizeof(args)); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - args.fsbno = cur->bc_private.b.firstblock; - args.firstblock = args.fsbno; - - if (args.fsbno == NULLFSBLOCK) { - args.fsbno = be64_to_cpu(start->l); - args.type = XFS_ALLOCTYPE_START_BNO; - /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. - */ - args.minleft = xfs_trans_get_block_res(args.tp); - } else if (cur->bc_private.b.flist->xbf_low) { - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - } - - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; - if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { - error = ENOSPC; - goto error0; - } - error = xfs_alloc_vextent(&args); - if (error) - goto error0; - - if (args.fsbno == NULLFSBLOCK && args.minleft) { - /* - * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if - * successful activate the lowspace algorithm. - */ - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; - error = xfs_alloc_vextent(&args); - if (error) - goto error0; - cur->bc_private.b.flist->xbf_low = 1; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, 1L); - - new->l = cpu_to_be64(args.fsbno); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - - error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -STATIC int -xfs_bmbt_free_block( - struct xfs_btree_cur *cur, - struct xfs_buf *bp) -{ - struct xfs_mount *mp = cur->bc_mp; - struct xfs_inode *ip = cur->bc_private.b.ip; - struct xfs_trans *tp = cur->bc_tp; - xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); - - xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp); - ip->i_d.di_nblocks--; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(tp, bp); - return 0; -} - -STATIC int -xfs_bmbt_get_minrecs( - struct xfs_btree_cur *cur, - int level) -{ - if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); - - return xfs_bmbt_maxrecs(cur->bc_mp, - ifp->if_broot_bytes, level == 0) / 2; - } - - return cur->bc_mp->m_bmap_dmnr[level != 0]; -} - -int -xfs_bmbt_get_maxrecs( - struct xfs_btree_cur *cur, - int level) -{ - if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); - - return xfs_bmbt_maxrecs(cur->bc_mp, - ifp->if_broot_bytes, level == 0); - } - - return cur->bc_mp->m_bmap_dmxr[level != 0]; - -} - -/* - * Get the maximum records we could store in the on-disk format. - * - * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but - * for the root node this checks the available space in the dinode fork - * so that we can resize the in-memory buffer to match it. After a - * resize to the maximum size this function returns the same value - * as xfs_bmbt_get_maxrecs for the root node, too. - */ -STATIC int -xfs_bmbt_get_dmaxrecs( - struct xfs_btree_cur *cur, - int level) -{ - if (level != cur->bc_nlevels - 1) - return cur->bc_mp->m_bmap_dmxr[level != 0]; - return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); -} - -STATIC void -xfs_bmbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - key->bmbt.br_startoff = - cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); -} - -STATIC void -xfs_bmbt_init_rec_from_key( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - ASSERT(key->bmbt.br_startoff != 0); - - xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), - 0, 0, XFS_EXT_NORM); -} - -STATIC void -xfs_bmbt_init_rec_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec) -{ - xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); -} - -STATIC void -xfs_bmbt_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - ptr->l = 0; -} - -STATIC __int64_t -xfs_bmbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) -{ - return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - - cur->bc_rec.b.br_startoff; -} - -static bool -xfs_bmbt_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - unsigned int level; - - switch (block->bb_magic) { - case cpu_to_be32(XFS_BMAP_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) - return false; - /* - * XXX: need a better way of verifying the owner here. Right now - * just make sure there has been one set. - */ - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) - return false; - /* fall through */ - case cpu_to_be32(XFS_BMAP_MAGIC): - break; - default: - return false; - } - - /* - * numrecs and level verification. - * - * We don't know what fork we belong to, so just verify that the level - * is less than the maximum of the two. Later checks will be more - * precise. - */ - level = be16_to_cpu(block->bb_level); - if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) - return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) - return false; - if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) - return false; - - return true; -} - -static void -xfs_bmbt_read_verify( - struct xfs_buf *bp) -{ - if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } -} - -static void -xfs_bmbt_write_verify( - struct xfs_buf *bp) -{ - if (!xfs_bmbt_verify(bp)) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - xfs_btree_lblock_calc_crc(bp); -} - -const struct xfs_buf_ops xfs_bmbt_buf_ops = { - .verify_read = xfs_bmbt_read_verify, - .verify_write = xfs_bmbt_write_verify, -}; - - -#if defined(DEBUG) || defined(XFS_WARN) -STATIC int -xfs_bmbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) -{ - return be64_to_cpu(k1->bmbt.br_startoff) < - be64_to_cpu(k2->bmbt.br_startoff); -} - -STATIC int -xfs_bmbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) -{ - return xfs_bmbt_disk_get_startoff(&r1->bmbt) + - xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= - xfs_bmbt_disk_get_startoff(&r2->bmbt); -} -#endif /* DEBUG */ - -static const struct xfs_btree_ops xfs_bmbt_ops = { - .rec_len = sizeof(xfs_bmbt_rec_t), - .key_len = sizeof(xfs_bmbt_key_t), - - .dup_cursor = xfs_bmbt_dup_cursor, - .update_cursor = xfs_bmbt_update_cursor, - .alloc_block = xfs_bmbt_alloc_block, - .free_block = xfs_bmbt_free_block, - .get_maxrecs = xfs_bmbt_get_maxrecs, - .get_minrecs = xfs_bmbt_get_minrecs, - .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, - .init_key_from_rec = xfs_bmbt_init_key_from_rec, - .init_rec_from_key = xfs_bmbt_init_rec_from_key, - .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, - .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, - .key_diff = xfs_bmbt_key_diff, - .buf_ops = &xfs_bmbt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_bmbt_keys_inorder, - .recs_inorder = xfs_bmbt_recs_inorder, -#endif -}; - -/* - * Allocate a new bmap btree cursor. - */ -struct xfs_btree_cur * /* new bmap btree cursor */ -xfs_bmbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* inode owning the btree */ - int whichfork) /* data or attr fork */ -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - struct xfs_btree_cur *cur; - - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - cur->bc_btnum = XFS_BTNUM_BMAP; - cur->bc_blocklog = mp->m_sb.sb_blocklog; - - cur->bc_ops = &xfs_bmbt_ops; - cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; - if (xfs_sb_version_hascrc(&mp->m_sb)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - - cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); - cur->bc_private.b.ip = ip; - cur->bc_private.b.firstblock = NULLFSBLOCK; - cur->bc_private.b.flist = NULL; - cur->bc_private.b.allocated = 0; - cur->bc_private.b.flags = 0; - cur->bc_private.b.whichfork = whichfork; - - return cur; -} - -/* - * Calculate number of records in a bmap btree block. - */ -int -xfs_bmbt_maxrecs( - struct xfs_mount *mp, - int blocklen, - int leaf) -{ - blocklen -= XFS_BMBT_BLOCK_LEN(mp); - - if (leaf) - return blocklen / sizeof(xfs_bmbt_rec_t); - return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); -} - -/* - * Calculate number of records in a bmap btree inode root. - */ -int -xfs_bmdr_maxrecs( - int blocklen, - int leaf) -{ - blocklen -= sizeof(xfs_bmdr_block_t); - - if (leaf) - return blocklen / sizeof(xfs_bmdr_rec_t); - return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); -} - -/* - * Change the owner of a btree format fork fo the inode passed in. Change it to - * the owner of that is passed in so that we can change owners before or after - * we switch forks between inodes. The operation that the caller is doing will - * determine whether is needs to change owner before or after the switch. - * - * For demand paged transactional modification, the fork switch should be done - * after reading in all the blocks, modifying them and pinning them in the - * transaction. For modification when the buffers are already pinned in memory, - * the fork switch can be done before changing the owner as we won't need to - * validate the owner until the btree buffers are unpinned and writes can occur - * again. - * - * For recovery based ownership change, there is no transactional context and - * so a buffer list must be supplied so that we can record the buffers that we - * modified for the caller to issue IO on. - */ -int -xfs_bmbt_change_owner( - struct xfs_trans *tp, - struct xfs_inode *ip, - int whichfork, - xfs_ino_t new_owner, - struct list_head *buffer_list) -{ - struct xfs_btree_cur *cur; - int error; - - ASSERT(tp || buffer_list); - ASSERT(!(tp && buffer_list)); - if (whichfork == XFS_DATA_FORK) - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); - else - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); - - cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); - if (!cur) - return ENOMEM; - - error = xfs_btree_change_owner(cur, new_owner, buffer_list); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - return error; -} diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c deleted file mode 100644 index 036b4fd34bf7..000000000000 --- a/fs/xfs/xfs_btree.c +++ /dev/null @@ -1,3989 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_buf_item.h" -#include "xfs_btree.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" - -/* - * Cursor allocation zone. - */ -kmem_zone_t *xfs_btree_cur_zone; - -/* - * Btree magic numbers. - */ -static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, - XFS_FIBT_MAGIC }, - { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } -}; -#define xfs_btree_magic(cur) \ - xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] - - -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_block *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp) /* buffer for block, if any */ -{ - int lblock_ok = 1; /* block passes checks */ - struct xfs_mount *mp; /* file system mount point */ - - mp = cur->bc_mp; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - lblock_ok = lblock_ok && - uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && - block->bb_u.l.bb_blkno == cpu_to_be64( - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); - } - - lblock_ok = lblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - cur->bc_ops->get_maxrecs(cur, level) && - block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || - XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_leftsib))) && - block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || - XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_rightsib))); - - if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, - XFS_ERRTAG_BTREE_CHECK_LBLOCK, - XFS_RANDOM_BTREE_CHECK_LBLOCK))) { - if (bp) - trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - return 0; -} - -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_block *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp) /* buffer containing block */ -{ - struct xfs_mount *mp; /* file system mount point */ - struct xfs_buf *agbp; /* buffer for ag. freespace struct */ - struct xfs_agf *agf; /* ag. freespace structure */ - xfs_agblock_t agflen; /* native ag. freespace length */ - int sblock_ok = 1; /* block passes checks */ - - mp = cur->bc_mp; - agbp = cur->bc_private.a.agbp; - agf = XFS_BUF_TO_AGF(agbp); - agflen = be32_to_cpu(agf->agf_length); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - sblock_ok = sblock_ok && - uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && - block->bb_u.s.bb_blkno == cpu_to_be64( - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); - } - - sblock_ok = sblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - cur->bc_ops->get_maxrecs(cur, level) && - (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || - be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && - block->bb_u.s.bb_leftsib && - (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || - be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && - block->bb_u.s.bb_rightsib; - - if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, - XFS_ERRTAG_BTREE_CHECK_SBLOCK, - XFS_RANDOM_BTREE_CHECK_SBLOCK))) { - if (bp) - trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - return 0; -} - -/* - * Debug routine: check that block header is ok. - */ -int -xfs_btree_check_block( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_block *block, /* generic btree block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp) /* buffer containing block, if any */ -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return xfs_btree_check_lblock(cur, block, level, bp); - else - return xfs_btree_check_sblock(cur, block, level, bp); -} - -/* - * Check that (long) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_dfsbno_t bno, /* btree block disk address */ - int level) /* btree block level */ -{ - XFS_WANT_CORRUPTED_RETURN( - level > 0 && - bno != NULLDFSBNO && - XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); - return 0; -} - -#ifdef DEBUG -/* - * Check that (short) pointer is ok. - */ -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* btree block disk address */ - int level) /* btree block level */ -{ - xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; - - XFS_WANT_CORRUPTED_RETURN( - level > 0 && - bno != NULLAGBLOCK && - bno != 0 && - bno < agblocks); - return 0; -} - -/* - * Check that block ptr is ok. - */ -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_ptr( - struct xfs_btree_cur *cur, /* btree cursor */ - union xfs_btree_ptr *ptr, /* btree block disk address */ - int index, /* offset from ptr to check */ - int level) /* btree block level */ -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - return xfs_btree_check_lptr(cur, - be64_to_cpu((&ptr->l)[index]), level); - } else { - return xfs_btree_check_sptr(cur, - be32_to_cpu((&ptr->s)[index]), level); - } -} -#endif - -/* - * Calculate CRC on the whole btree block and stuff it into the - * long-form btree header. - * - * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made - * it to disk. - */ -void -xfs_btree_lblock_calc_crc( - struct xfs_buf *bp) -{ - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return; - if (bip) - block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); -} - -bool -xfs_btree_lblock_verify_crc( - struct xfs_buf *bp) -{ - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); - - return true; -} - -/* - * Calculate CRC on the whole btree block and stuff it into the - * short-form btree header. - * - * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made - * it to disk. - */ -void -xfs_btree_sblock_calc_crc( - struct xfs_buf *bp) -{ - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return; - if (bip) - block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); -} - -bool -xfs_btree_sblock_verify_crc( - struct xfs_buf *bp) -{ - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); - - return true; -} - -/* - * Delete the btree cursor. - */ -void -xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ - int error) /* del because of error */ -{ - int i; /* btree level */ - - /* - * Clear the buffer pointers, and release the buffers. - * If we're doing this in the face of an error, we - * need to make sure to inspect all of the entries - * in the bc_bufs array for buffers to be unlocked. - * This is because some of the btree code works from - * level n down to 0, and if we get an error along - * the way we won't have initialized all the entries - * down to 0. - */ - for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); - else if (!error) - break; - } - /* - * Can't free a bmap cursor without having dealt with the - * allocated indirect blocks' accounting. - */ - ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || - cur->bc_private.b.allocated == 0); - /* - * Free the cursor. - */ - kmem_zone_free(xfs_btree_cur_zone, cur); -} - -/* - * Duplicate the btree cursor. - * Allocate a new one, copy the record, re-get the buffers. - */ -int /* error */ -xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur) /* output cursor */ -{ - xfs_buf_t *bp; /* btree block's buffer pointer */ - int error; /* error return value */ - int i; /* level number of btree block */ - xfs_mount_t *mp; /* mount structure for filesystem */ - xfs_btree_cur_t *new; /* new cursor value */ - xfs_trans_t *tp; /* transaction pointer, can be NULL */ - - tp = cur->bc_tp; - mp = cur->bc_mp; - - /* - * Allocate a new cursor like the old one. - */ - new = cur->bc_ops->dup_cursor(cur); - - /* - * Copy the record currently in the cursor. - */ - new->bc_rec = cur->bc_rec; - - /* - * For each level current, re-get the buffer and copy the ptr value. - */ - for (i = 0; i < new->bc_nlevels; i++) { - new->bc_ptrs[i] = cur->bc_ptrs[i]; - new->bc_ra[i] = cur->bc_ra[i]; - bp = cur->bc_bufs[i]; - if (bp) { - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_BUF_ADDR(bp), mp->m_bsize, - 0, &bp, - cur->bc_ops->buf_ops); - if (error) { - xfs_btree_del_cursor(new, error); - *ncur = NULL; - return error; - } - } - new->bc_bufs[i] = bp; - } - *ncur = new; - return 0; -} - -/* - * XFS btree block layout and addressing: - * - * There are two types of blocks in the btree: leaf and non-leaf blocks. - * - * The leaf record start with a header then followed by records containing - * the values. A non-leaf block also starts with the same header, and - * then first contains lookup keys followed by an equal number of pointers - * to the btree blocks at the previous level. - * - * +--------+-------+-------+-------+-------+-------+-------+ - * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | - * +--------+-------+-------+-------+-------+-------+-------+ - * - * +--------+-------+-------+-------+-------+-------+-------+ - * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | - * +--------+-------+-------+-------+-------+-------+-------+ - * - * The header is called struct xfs_btree_block for reasons better left unknown - * and comes in different versions for short (32bit) and long (64bit) block - * pointers. The record and key structures are defined by the btree instances - * and opaque to the btree core. The block pointers are simple disk endian - * integers, available in a short (32bit) and long (64bit) variant. - * - * The helpers below calculate the offset of a given record, key or pointer - * into a btree block (xfs_btree_*_offset) or return a pointer to the given - * record, key or pointer (xfs_btree_*_addr). Note that all addressing - * inside the btree block is done using indices starting at one, not zero! - */ - -/* - * Return size of the btree block header for this btree instance. - */ -static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) - return XFS_BTREE_LBLOCK_CRC_LEN; - return XFS_BTREE_LBLOCK_LEN; - } - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) - return XFS_BTREE_SBLOCK_CRC_LEN; - return XFS_BTREE_SBLOCK_LEN; -} - -/* - * Return size of btree block pointers for this btree instance. - */ -static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) -{ - return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - sizeof(__be64) : sizeof(__be32); -} - -/* - * Calculate offset of the n-th record in a btree block. - */ -STATIC size_t -xfs_btree_rec_offset( - struct xfs_btree_cur *cur, - int n) -{ - return xfs_btree_block_len(cur) + - (n - 1) * cur->bc_ops->rec_len; -} - -/* - * Calculate offset of the n-th key in a btree block. - */ -STATIC size_t -xfs_btree_key_offset( - struct xfs_btree_cur *cur, - int n) -{ - return xfs_btree_block_len(cur) + - (n - 1) * cur->bc_ops->key_len; -} - -/* - * Calculate offset of the n-th block pointer in a btree block. - */ -STATIC size_t -xfs_btree_ptr_offset( - struct xfs_btree_cur *cur, - int n, - int level) -{ - return xfs_btree_block_len(cur) + - cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + - (n - 1) * xfs_btree_ptr_len(cur); -} - -/* - * Return a pointer to the n-th record in the btree block. - */ -STATIC union xfs_btree_rec * -xfs_btree_rec_addr( - struct xfs_btree_cur *cur, - int n, - struct xfs_btree_block *block) -{ - return (union xfs_btree_rec *) - ((char *)block + xfs_btree_rec_offset(cur, n)); -} - -/* - * Return a pointer to the n-th key in the btree block. - */ -STATIC union xfs_btree_key * -xfs_btree_key_addr( - struct xfs_btree_cur *cur, - int n, - struct xfs_btree_block *block) -{ - return (union xfs_btree_key *) - ((char *)block + xfs_btree_key_offset(cur, n)); -} - -/* - * Return a pointer to the n-th block pointer in the btree block. - */ -STATIC union xfs_btree_ptr * -xfs_btree_ptr_addr( - struct xfs_btree_cur *cur, - int n, - struct xfs_btree_block *block) -{ - int level = xfs_btree_get_level(block); - - ASSERT(block->bb_level != 0); - - return (union xfs_btree_ptr *) - ((char *)block + xfs_btree_ptr_offset(cur, n, level)); -} - -/* - * Get the root block which is stored in the inode. - * - * For now this btree implementation assumes the btree root is always - * stored in the if_broot field of an inode fork. - */ -STATIC struct xfs_btree_block * -xfs_btree_get_iroot( - struct xfs_btree_cur *cur) -{ - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); - return (struct xfs_btree_block *)ifp->if_broot; -} - -/* - * Retrieve the block pointer from the cursor at the given level. - * This may be an inode btree root or from a buffer. - */ -STATIC struct xfs_btree_block * /* generic btree block pointer */ -xfs_btree_get_block( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* level in btree */ - struct xfs_buf **bpp) /* buffer containing the block */ -{ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) { - *bpp = NULL; - return xfs_btree_get_iroot(cur); - } - - *bpp = cur->bc_bufs[level]; - return XFS_BUF_TO_BLOCK(*bpp); -} - -/* - * Get a buffer for the block, return it with no data read. - * Long-form addressing. - */ -xfs_buf_t * /* buffer for fsbno */ -xfs_btree_get_bufl( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock) /* lock flags for get_buf */ -{ - xfs_daddr_t d; /* real disk block address */ - - ASSERT(fsbno != NULLFSBLOCK); - d = XFS_FSB_TO_DADDR(mp, fsbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -} - -/* - * Get a buffer for the block, return it with no data read. - * Short-form addressing. - */ -xfs_buf_t * /* buffer for agno/agbno */ -xfs_btree_get_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock) /* lock flags for get_buf */ -{ - xfs_daddr_t d; /* real disk block address */ - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -} - -/* - * Check for the cursor referring to the last block at the given level. - */ -int /* 1=is last block, 0=not last block */ -xfs_btree_islastblock( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to check */ -{ - struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); - else - return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); -} - -/* - * Change the cursor to point to the first record at the given level. - * Other levels are unaffected. - */ -STATIC int /* success=1, failure=0 */ -xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to change */ -{ - struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - /* - * Get the block pointer for this level. - */ - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - /* - * It's empty, there is no such record. - */ - if (!block->bb_numrecs) - return 0; - /* - * Set the ptr value to 1, that's the first record/key. - */ - cur->bc_ptrs[level] = 1; - return 1; -} - -/* - * Change the cursor to point to the last record in the current block - * at the given level. Other levels are unaffected. - */ -STATIC int /* success=1, failure=0 */ -xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to change */ -{ - struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - /* - * Get the block pointer for this level. - */ - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - /* - * It's empty, there is no such record. - */ - if (!block->bb_numrecs) - return 0; - /* - * Set the ptr value to numrecs, that's the last record/key. - */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); - return 1; -} - -/* - * Compute first and last byte offsets for the fields given. - * Interprets the offsets table, which contains struct field offsets. - */ -void -xfs_btree_offsets( - __int64_t fields, /* bitmask of fields */ - const short *offsets, /* table of field offsets */ - int nbits, /* number of bits to inspect */ - int *first, /* output: first byte offset */ - int *last) /* output: last byte offset */ -{ - int i; /* current bit number */ - __int64_t imask; /* mask for current bit number */ - - ASSERT(fields != 0); - /* - * Find the lowest bit, so the first byte offset. - */ - for (i = 0, imask = 1LL; ; i++, imask <<= 1) { - if (imask & fields) { - *first = offsets[i]; - break; - } - } - /* - * Find the highest bit, so the last byte offset. - */ - for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { - if (imask & fields) { - *last = offsets[i + 1] - 1; - break; - } - } -} - -/* - * Get a buffer for the block, return it read in. - * Long-form addressing. - */ -int -xfs_btree_read_bufl( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ - struct xfs_buf **bpp, /* buffer for fsbno */ - int refval, /* ref count value for buffer */ - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; /* return value */ - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(fsbno != NULLFSBLOCK); - d = XFS_FSB_TO_DADDR(mp, fsbno); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp, ops); - if (error) - return error; - if (bp) - xfs_buf_set_ref(bp, refval); - *bpp = bp; - return 0; -} - -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Long-form addressing. - */ -/* ARGSUSED */ -void -xfs_btree_reada_bufl( - struct xfs_mount *mp, /* file system mount point */ - xfs_fsblock_t fsbno, /* file system block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops) -{ - xfs_daddr_t d; - - ASSERT(fsbno != NULLFSBLOCK); - d = XFS_FSB_TO_DADDR(mp, fsbno); - xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); -} - -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Short-form addressing. - */ -/* ARGSUSED */ -void -xfs_btree_reada_bufs( - struct xfs_mount *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops) -{ - xfs_daddr_t d; - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); -} - -STATIC int -xfs_btree_readahead_lblock( - struct xfs_btree_cur *cur, - int lr, - struct xfs_btree_block *block) -{ - int rval = 0; - xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); - xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); - - if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, left, 1, - cur->bc_ops->buf_ops); - rval++; - } - - if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, right, 1, - cur->bc_ops->buf_ops); - rval++; - } - - return rval; -} - -STATIC int -xfs_btree_readahead_sblock( - struct xfs_btree_cur *cur, - int lr, - struct xfs_btree_block *block) -{ - int rval = 0; - xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); - xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); - - - if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - left, 1, cur->bc_ops->buf_ops); - rval++; - } - - if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - right, 1, cur->bc_ops->buf_ops); - rval++; - } - - return rval; -} - -/* - * Read-ahead btree blocks, at the given level. - * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. - */ -STATIC int -xfs_btree_readahead( - struct xfs_btree_cur *cur, /* btree cursor */ - int lev, /* level in btree */ - int lr) /* left/right bits */ -{ - struct xfs_btree_block *block; - - /* - * No readahead needed if we are at the root level and the - * btree root is stored in the inode. - */ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (lev == cur->bc_nlevels - 1)) - return 0; - - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) - return 0; - - cur->bc_ra[lev] |= lr; - block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); - - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return xfs_btree_readahead_lblock(cur, lr, block); - return xfs_btree_readahead_sblock(cur, lr, block); -} - -STATIC xfs_daddr_t -xfs_btree_ptr_to_daddr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); - - return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); - } else { - ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); - - return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(ptr->s)); - } -} - -/* - * Readahead @count btree blocks at the given @ptr location. - * - * We don't need to care about long or short form btrees here as we have a - * method of converting the ptr directly to a daddr available to us. - */ -STATIC void -xfs_btree_readahead_ptr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - xfs_extlen_t count) -{ - xfs_buf_readahead(cur->bc_mp->m_ddev_targp, - xfs_btree_ptr_to_daddr(cur, ptr), - cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); -} - -/* - * Set the buffer for level "lev" in the cursor to bp, releasing - * any previous buffer. - */ -STATIC void -xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - xfs_buf_t *bp) /* new buffer to set */ -{ - struct xfs_btree_block *b; /* btree block */ - - if (cur->bc_bufs[lev]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); - cur->bc_bufs[lev] = bp; - cur->bc_ra[lev] = 0; - - b = XFS_BUF_TO_BLOCK(bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; - } else { - if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; - } -} - -STATIC int -xfs_btree_ptr_is_null( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return ptr->l == cpu_to_be64(NULLDFSBNO); - else - return ptr->s == cpu_to_be32(NULLAGBLOCK); -} - -STATIC void -xfs_btree_set_ptr_null( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - ptr->l = cpu_to_be64(NULLDFSBNO); - else - ptr->s = cpu_to_be32(NULLAGBLOCK); -} - -/* - * Get/set/init sibling pointers - */ -STATIC void -xfs_btree_get_sibling( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_ptr *ptr, - int lr) -{ - ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); - - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (lr == XFS_BB_RIGHTSIB) - ptr->l = block->bb_u.l.bb_rightsib; - else - ptr->l = block->bb_u.l.bb_leftsib; - } else { - if (lr == XFS_BB_RIGHTSIB) - ptr->s = block->bb_u.s.bb_rightsib; - else - ptr->s = block->bb_u.s.bb_leftsib; - } -} - -STATIC void -xfs_btree_set_sibling( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_ptr *ptr, - int lr) -{ - ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); - - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (lr == XFS_BB_RIGHTSIB) - block->bb_u.l.bb_rightsib = ptr->l; - else - block->bb_u.l.bb_leftsib = ptr->l; - } else { - if (lr == XFS_BB_RIGHTSIB) - block->bb_u.s.bb_rightsib = ptr->s; - else - block->bb_u.s.bb_leftsib = ptr->s; - } -} - -void -xfs_btree_init_block_int( - struct xfs_mount *mp, - struct xfs_btree_block *buf, - xfs_daddr_t blkno, - __u32 magic, - __u16 level, - __u16 numrecs, - __u64 owner, - unsigned int flags) -{ - buf->bb_magic = cpu_to_be32(magic); - buf->bb_level = cpu_to_be16(level); - buf->bb_numrecs = cpu_to_be16(numrecs); - - if (flags & XFS_BTREE_LONG_PTRS) { - buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); - if (flags & XFS_BTREE_CRC_BLOCKS) { - buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); - buf->bb_u.l.bb_owner = cpu_to_be64(owner); - uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); - buf->bb_u.l.bb_pad = 0; - buf->bb_u.l.bb_lsn = 0; - } - } else { - /* owner is a 32 bit value on short blocks */ - __u32 __owner = (__u32)owner; - - buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); - buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { - buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); - buf->bb_u.s.bb_owner = cpu_to_be32(__owner); - uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); - buf->bb_u.s.bb_lsn = 0; - } - } -} - -void -xfs_btree_init_block( - struct xfs_mount *mp, - struct xfs_buf *bp, - __u32 magic, - __u16 level, - __u16 numrecs, - __u64 owner, - unsigned int flags) -{ - xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - magic, level, numrecs, owner, flags); -} - -STATIC void -xfs_btree_init_block_cur( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - int numrecs) -{ - __u64 owner; - - /* - * we can pull the owner from the cursor right now as the different - * owners align directly with the pointer size of the btree. This may - * change in future, but is safe for current users of the generic btree - * code. - */ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - owner = cur->bc_private.b.ip->i_ino; - else - owner = cur->bc_private.a.agno; - - xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - xfs_btree_magic(cur), level, numrecs, - owner, cur->bc_flags); -} - -/* - * Return true if ptr is the last record in the btree and - * we need to track updates to this record. The decision - * will be further refined in the update_lastrec method. - */ -STATIC int -xfs_btree_is_lastrec( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - int level) -{ - union xfs_btree_ptr ptr; - - if (level > 0) - return 0; - if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) - return 0; - - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); - if (!xfs_btree_ptr_is_null(cur, &ptr)) - return 0; - return 1; -} - -STATIC void -xfs_btree_buf_to_ptr( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, - XFS_BUF_ADDR(bp))); - else { - ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, - XFS_BUF_ADDR(bp))); - } -} - -STATIC void -xfs_btree_set_refs( - struct xfs_btree_cur *cur, - struct xfs_buf *bp) -{ - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); - break; - case XFS_BTNUM_INO: - case XFS_BTNUM_FINO: - xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); - break; - case XFS_BTNUM_BMAP: - xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); - break; - default: - ASSERT(0); - } -} - -STATIC int -xfs_btree_get_buf_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int flags, - struct xfs_btree_block **block, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = cur->bc_mp; - xfs_daddr_t d; - - /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XBF_TRYLOCK)); - - d = xfs_btree_ptr_to_daddr(cur, ptr); - *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags); - - if (!*bpp) - return ENOMEM; - - (*bpp)->b_ops = cur->bc_ops->buf_ops; - *block = XFS_BUF_TO_BLOCK(*bpp); - return 0; -} - -/* - * Read in the buffer at the given ptr and return the buffer and - * the block pointer within the buffer. - */ -STATIC int -xfs_btree_read_buf_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int flags, - struct xfs_btree_block **block, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = cur->bc_mp; - xfs_daddr_t d; - int error; - - /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XBF_TRYLOCK)); - - d = xfs_btree_ptr_to_daddr(cur, ptr); - error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags, bpp, - cur->bc_ops->buf_ops); - if (error) - return error; - - xfs_btree_set_refs(cur, *bpp); - *block = XFS_BUF_TO_BLOCK(*bpp); - return 0; -} - -/* - * Copy keys from one btree block to another. - */ -STATIC void -xfs_btree_copy_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *dst_key, - union xfs_btree_key *src_key, - int numkeys) -{ - ASSERT(numkeys >= 0); - memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); -} - -/* - * Copy records from one btree block to another. - */ -STATIC void -xfs_btree_copy_recs( - struct xfs_btree_cur *cur, - union xfs_btree_rec *dst_rec, - union xfs_btree_rec *src_rec, - int numrecs) -{ - ASSERT(numrecs >= 0); - memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); -} - -/* - * Copy block pointers from one btree block to another. - */ -STATIC void -xfs_btree_copy_ptrs( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *dst_ptr, - union xfs_btree_ptr *src_ptr, - int numptrs) -{ - ASSERT(numptrs >= 0); - memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); -} - -/* - * Shift keys one index left/right inside a single btree block. - */ -STATIC void -xfs_btree_shift_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *key, - int dir, - int numkeys) -{ - char *dst_key; - - ASSERT(numkeys >= 0); - ASSERT(dir == 1 || dir == -1); - - dst_key = (char *)key + (dir * cur->bc_ops->key_len); - memmove(dst_key, key, numkeys * cur->bc_ops->key_len); -} - -/* - * Shift records one index left/right inside a single btree block. - */ -STATIC void -xfs_btree_shift_recs( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - int dir, - int numrecs) -{ - char *dst_rec; - - ASSERT(numrecs >= 0); - ASSERT(dir == 1 || dir == -1); - - dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); - memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); -} - -/* - * Shift block pointers one index left/right inside a single btree block. - */ -STATIC void -xfs_btree_shift_ptrs( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int dir, - int numptrs) -{ - char *dst_ptr; - - ASSERT(numptrs >= 0); - ASSERT(dir == 1 || dir == -1); - - dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); - memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); -} - -/* - * Log key values from the btree block. - */ -STATIC void -xfs_btree_log_keys( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int first, - int last) -{ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); - - if (bp) { - xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); - xfs_trans_log_buf(cur->bc_tp, bp, - xfs_btree_key_offset(cur, first), - xfs_btree_key_offset(cur, last + 1) - 1); - } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); -} - -/* - * Log record values from the btree block. - */ -void -xfs_btree_log_recs( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int first, - int last) -{ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); - - xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); - xfs_trans_log_buf(cur->bc_tp, bp, - xfs_btree_rec_offset(cur, first), - xfs_btree_rec_offset(cur, last + 1) - 1); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); -} - -/* - * Log block pointer fields from a btree block (nonleaf). - */ -STATIC void -xfs_btree_log_ptrs( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_buf *bp, /* buffer containing btree block */ - int first, /* index of first pointer to log */ - int last) /* index of last pointer to log */ -{ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); - - if (bp) { - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - int level = xfs_btree_get_level(block); - - xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); - xfs_trans_log_buf(cur->bc_tp, bp, - xfs_btree_ptr_offset(cur, first, level), - xfs_btree_ptr_offset(cur, last + 1, level) - 1); - } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); -} - -/* - * Log fields from a btree block header. - */ -void -xfs_btree_log_block( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_buf *bp, /* buffer containing btree block */ - int fields) /* mask of fields: XFS_BB_... */ -{ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - static const short soffsets[] = { /* table of offsets (short) */ - offsetof(struct xfs_btree_block, bb_magic), - offsetof(struct xfs_btree_block, bb_level), - offsetof(struct xfs_btree_block, bb_numrecs), - offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), - offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), - offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), - offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), - offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), - offsetof(struct xfs_btree_block, bb_u.s.bb_owner), - offsetof(struct xfs_btree_block, bb_u.s.bb_crc), - XFS_BTREE_SBLOCK_CRC_LEN - }; - static const short loffsets[] = { /* table of offsets (long) */ - offsetof(struct xfs_btree_block, bb_magic), - offsetof(struct xfs_btree_block, bb_level), - offsetof(struct xfs_btree_block, bb_numrecs), - offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), - offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), - offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), - offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), - offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), - offsetof(struct xfs_btree_block, bb_u.l.bb_owner), - offsetof(struct xfs_btree_block, bb_u.l.bb_crc), - offsetof(struct xfs_btree_block, bb_u.l.bb_pad), - XFS_BTREE_LBLOCK_CRC_LEN - }; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBI(cur, bp, fields); - - if (bp) { - int nbits; - - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { - /* - * We don't log the CRC when updating a btree - * block but instead recreate it during log - * recovery. As the log buffers have checksums - * of their own this is safe and avoids logging a crc - * update in a lot of places. - */ - if (fields == XFS_BB_ALL_BITS) - fields = XFS_BB_ALL_BITS_CRC; - nbits = XFS_BB_NUM_BITS_CRC; - } else { - nbits = XFS_BB_NUM_BITS; - } - xfs_btree_offsets(fields, - (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - loffsets : soffsets, - nbits, &first, &last); - xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); - } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); -} - -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_btree_increment( - struct xfs_btree_cur *cur, - int level, - int *stat) /* success/failure */ -{ - struct xfs_btree_block *block; - union xfs_btree_ptr ptr; - struct xfs_buf *bp; - int error; /* error return value */ - int lev; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - - ASSERT(level < cur->bc_nlevels); - - /* Read-ahead to the right at this level. */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - - /* Get a pointer to the btree block. */ - block = xfs_btree_get_block(cur, level, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) - goto error0; -#endif - - /* We're done if we remain in the block after the increment. */ - if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) - goto out1; - - /* Fail if we just went off the right edge of the tree. */ - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); - if (xfs_btree_ptr_is_null(cur, &ptr)) - goto out0; - - XFS_BTREE_STATS_INC(cur, increment); - - /* - * March up the tree incrementing pointers. - * Stop when we don't go off the right edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - block = xfs_btree_get_block(cur, lev, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, lev, bp); - if (error) - goto error0; -#endif - - if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) - break; - - /* Read-ahead the right block for the next loop. */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - - /* - * If we went off the root then we are either seriously - * confused or have the tree root in an inode. - */ - if (lev == cur->bc_nlevels) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - goto out0; - ASSERT(0); - error = EFSCORRUPTED; - goto error0; - } - ASSERT(lev < cur->bc_nlevels); - - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { - union xfs_btree_ptr *ptrp; - - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - --lev; - error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); - if (error) - goto error0; - - xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = 1; - } -out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_btree_decrement( - struct xfs_btree_cur *cur, - int level, - int *stat) /* success/failure */ -{ - struct xfs_btree_block *block; - xfs_buf_t *bp; - int error; /* error return value */ - int lev; - union xfs_btree_ptr ptr; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - - ASSERT(level < cur->bc_nlevels); - - /* Read-ahead to the left at this level. */ - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - - /* We're done if we remain in the block after the decrement. */ - if (--cur->bc_ptrs[level] > 0) - goto out1; - - /* Get a pointer to the btree block. */ - block = xfs_btree_get_block(cur, level, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) - goto error0; -#endif - - /* Fail if we just went off the left edge of the tree. */ - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); - if (xfs_btree_ptr_is_null(cur, &ptr)) - goto out0; - - XFS_BTREE_STATS_INC(cur, decrement); - - /* - * March up the tree decrementing pointers. - * Stop when we don't go off the left edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - /* Read-ahead the left block for the next loop. */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - - /* - * If we went off the root then we are seriously confused. - * or the root of the tree is in an inode. - */ - if (lev == cur->bc_nlevels) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - goto out0; - ASSERT(0); - error = EFSCORRUPTED; - goto error0; - } - ASSERT(lev < cur->bc_nlevels); - - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { - union xfs_btree_ptr *ptrp; - - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - --lev; - error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); - if (error) - goto error0; - xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); - } -out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -STATIC int -xfs_btree_lookup_get_block( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* level in the btree */ - union xfs_btree_ptr *pp, /* ptr to btree block */ - struct xfs_btree_block **blkp) /* return btree block */ -{ - struct xfs_buf *bp; /* buffer pointer for btree block */ - int error = 0; - - /* special case the root block if in an inode */ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) { - *blkp = xfs_btree_get_iroot(cur); - return 0; - } - - /* - * If the old buffer at this level for the disk address we are - * looking for re-use it. - * - * Otherwise throw it away and get a new one. - */ - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { - *blkp = XFS_BUF_TO_BLOCK(bp); - return 0; - } - - error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); - if (error) - return error; - - xfs_btree_setbuf(cur, level, bp); - return 0; -} - -/* - * Get current search key. For level 0 we don't actually have a key - * structure so we make one up from the record. For all other levels - * we just return the right key. - */ -STATIC union xfs_btree_key * -xfs_lookup_get_search_key( - struct xfs_btree_cur *cur, - int level, - int keyno, - struct xfs_btree_block *block, - union xfs_btree_key *kp) -{ - if (level == 0) { - cur->bc_ops->init_key_from_rec(kp, - xfs_btree_rec_addr(cur, keyno, block)); - return kp; - } - - return xfs_btree_key_addr(cur, keyno, block); -} - -/* - * Lookup the record. The cursor is made to point to it, based on dir. - * stat is set to 0 if can't find any such record, 1 for success. - */ -int /* error */ -xfs_btree_lookup( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_lookup_t dir, /* <=, ==, or >= */ - int *stat) /* success/failure */ -{ - struct xfs_btree_block *block; /* current btree block */ - __int64_t diff; /* difference for the current key */ - int error; /* error return value */ - int keyno; /* current key number */ - int level; /* level in the btree */ - union xfs_btree_ptr *pp; /* ptr to btree block */ - union xfs_btree_ptr ptr; /* ptr to btree block */ - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, dir); - - XFS_BTREE_STATS_INC(cur, lookup); - - block = NULL; - keyno = 0; - - /* initialise start pointer from cursor */ - cur->bc_ops->init_ptr_from_cur(cur, &ptr); - pp = &ptr; - - /* - * Iterate over each level in the btree, starting at the root. - * For each level above the leaves, find the key we need, based - * on the lookup record, then follow the corresponding block - * pointer down to the next level. - */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - /* Get the block we need to do the lookup on. */ - error = xfs_btree_lookup_get_block(cur, level, pp, &block); - if (error) - goto error0; - - if (diff == 0) { - /* - * If we already had a key match at a higher level, we - * know we need to use the first entry in this block. - */ - keyno = 1; - } else { - /* Otherwise search this block. Do a binary search. */ - - int high; /* high entry number */ - int low; /* low entry number */ - - /* Set low and high entry numbers, 1-based. */ - low = 1; - high = xfs_btree_get_numrecs(block); - if (!high) { - /* Block is empty, must be an empty leaf. */ - ASSERT(level == 0 && cur->bc_nlevels == 1); - - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - - /* Binary search the block. */ - while (low <= high) { - union xfs_btree_key key; - union xfs_btree_key *kp; - - XFS_BTREE_STATS_INC(cur, compare); - - /* keyno is average of low and high. */ - keyno = (low + high) >> 1; - - /* Get current search key */ - kp = xfs_lookup_get_search_key(cur, level, - keyno, block, &key); - - /* - * Compute difference to get next direction: - * - less than, move right - * - greater than, move left - * - equal, we're done - */ - diff = cur->bc_ops->key_diff(cur, kp); - if (diff < 0) - low = keyno + 1; - else if (diff > 0) - high = keyno - 1; - else - break; - } - } - - /* - * If there are more levels, set up for the next level - * by getting the block number and filling in the cursor. - */ - if (level > 0) { - /* - * If we moved left, need the previous key number, - * unless there isn't one. - */ - if (diff > 0 && --keyno < 1) - keyno = 1; - pp = xfs_btree_ptr_addr(cur, keyno, block); - -#ifdef DEBUG - error = xfs_btree_check_ptr(cur, pp, 0, level); - if (error) - goto error0; -#endif - cur->bc_ptrs[level] = keyno; - } - } - - /* Done with the search. See if we need to adjust the results. */ - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); - if (dir == XFS_LOOKUP_GE && - keyno > xfs_btree_get_numrecs(block) && - !xfs_btree_ptr_is_null(cur, &ptr)) { - int i; - - cur->bc_ptrs[0] = keyno; - error = xfs_btree_increment(cur, 0, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_RETURN(i == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - } - } else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - - /* Return if we succeeded or not. */ - if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) - *stat = 0; - else if (dir != XFS_LOOKUP_EQ || diff == 0) - *stat = 1; - else - *stat = 0; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Update keys at all levels from here to the root along the cursor's path. - */ -STATIC int -xfs_btree_updkey( - struct xfs_btree_cur *cur, - union xfs_btree_key *keyp, - int level) -{ - struct xfs_btree_block *block; - struct xfs_buf *bp; - union xfs_btree_key *kp; - int ptr; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIK(cur, level, keyp); - - ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); - - /* - * Go up the tree from this level toward the root. - * At each level, update the key value to the value input. - * Stop when we reach a level where the cursor isn't pointing - * at the first entry in the block. - */ - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { -#ifdef DEBUG - int error; -#endif - block = xfs_btree_get_block(cur, level, &bp); -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[level]; - kp = xfs_btree_key_addr(cur, ptr, block); - xfs_btree_copy_keys(cur, kp, keyp, 1); - xfs_btree_log_keys(cur, bp, ptr, ptr); - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - -/* - * Update the record referred to by cur to the value in the - * given record. This either works (return 0) or gets an - * EFSCORRUPTED error. - */ -int -xfs_btree_update( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec) -{ - struct xfs_btree_block *block; - struct xfs_buf *bp; - int error; - int ptr; - union xfs_btree_rec *rp; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGR(cur, rec); - - /* Pick up the current block. */ - block = xfs_btree_get_block(cur, 0, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, 0, bp); - if (error) - goto error0; -#endif - /* Get the address of the rec to be updated. */ - ptr = cur->bc_ptrs[0]; - rp = xfs_btree_rec_addr(cur, ptr, block); - - /* Fill in the new contents and log them. */ - xfs_btree_copy_recs(cur, rp, rec, 1); - xfs_btree_log_recs(cur, bp, ptr, ptr); - - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, 0)) { - cur->bc_ops->update_lastrec(cur, block, rec, - ptr, LASTREC_UPDATE); - } - - /* Updating first rec in leaf. Pass new key value up to our parent. */ - if (ptr == 1) { - union xfs_btree_key key; - - cur->bc_ops->init_key_from_rec(&key, rec); - error = xfs_btree_updkey(cur, &key, 1); - if (error) - goto error0; - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_btree_lshift( - struct xfs_btree_cur *cur, - int level, - int *stat) /* success/failure */ -{ - union xfs_btree_key key; /* btree key */ - struct xfs_buf *lbp; /* left buffer pointer */ - struct xfs_btree_block *left; /* left btree block */ - int lrecs; /* left record count */ - struct xfs_buf *rbp; /* right buffer pointer */ - struct xfs_btree_block *right; /* right btree block */ - int rrecs; /* right record count */ - union xfs_btree_ptr lptr; /* left btree pointer */ - union xfs_btree_key *rkp = NULL; /* right btree key */ - union xfs_btree_ptr *rpp = NULL; /* right address pointer */ - union xfs_btree_rec *rrp = NULL; /* right record pointer */ - int error; /* error return value */ - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - level == cur->bc_nlevels - 1) - goto out0; - - /* Set up variables for this block as "right". */ - right = xfs_btree_get_block(cur, level, &rbp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, right, level, rbp); - if (error) - goto error0; -#endif - - /* If we've got no left sibling then we can't shift an entry left. */ - xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - if (xfs_btree_ptr_is_null(cur, &lptr)) - goto out0; - - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] <= 1) - goto out0; - - /* Set up the left neighbor as "left". */ - error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); - if (error) - goto error0; - - /* If it's full, it can't take another entry. */ - lrecs = xfs_btree_get_numrecs(left); - if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) - goto out0; - - rrecs = xfs_btree_get_numrecs(right); - - /* - * We add one entry to the left side and remove one for the right side. - * Account for it here, the changes will be updated on disk and logged - * later. - */ - lrecs++; - rrecs--; - - XFS_BTREE_STATS_INC(cur, lshift); - XFS_BTREE_STATS_ADD(cur, moves, 1); - - /* - * If non-leaf, copy a key and a ptr to the left block. - * Log the changes to the left block. - */ - if (level > 0) { - /* It's a non-leaf. Move keys and pointers. */ - union xfs_btree_key *lkp; /* left btree key */ - union xfs_btree_ptr *lpp; /* left address pointer */ - - lkp = xfs_btree_key_addr(cur, lrecs, left); - rkp = xfs_btree_key_addr(cur, 1, right); - - lpp = xfs_btree_ptr_addr(cur, lrecs, left); - rpp = xfs_btree_ptr_addr(cur, 1, right); -#ifdef DEBUG - error = xfs_btree_check_ptr(cur, rpp, 0, level); - if (error) - goto error0; -#endif - xfs_btree_copy_keys(cur, lkp, rkp, 1); - xfs_btree_copy_ptrs(cur, lpp, rpp, 1); - - xfs_btree_log_keys(cur, lbp, lrecs, lrecs); - xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); - - ASSERT(cur->bc_ops->keys_inorder(cur, - xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); - } else { - /* It's a leaf. Move records. */ - union xfs_btree_rec *lrp; /* left record pointer */ - - lrp = xfs_btree_rec_addr(cur, lrecs, left); - rrp = xfs_btree_rec_addr(cur, 1, right); - - xfs_btree_copy_recs(cur, lrp, rrp, 1); - xfs_btree_log_recs(cur, lbp, lrecs, lrecs); - - ASSERT(cur->bc_ops->recs_inorder(cur, - xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); - } - - xfs_btree_set_numrecs(left, lrecs); - xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); - - xfs_btree_set_numrecs(right, rrecs); - xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); - - /* - * Slide the contents of right down one entry. - */ - XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); - if (level > 0) { - /* It's a nonleaf. operate on keys and ptrs */ -#ifdef DEBUG - int i; /* loop index */ - - for (i = 0; i < rrecs; i++) { - error = xfs_btree_check_ptr(cur, rpp, i + 1, level); - if (error) - goto error0; - } -#endif - xfs_btree_shift_keys(cur, - xfs_btree_key_addr(cur, 2, right), - -1, rrecs); - xfs_btree_shift_ptrs(cur, - xfs_btree_ptr_addr(cur, 2, right), - -1, rrecs); - - xfs_btree_log_keys(cur, rbp, 1, rrecs); - xfs_btree_log_ptrs(cur, rbp, 1, rrecs); - } else { - /* It's a leaf. operate on records */ - xfs_btree_shift_recs(cur, - xfs_btree_rec_addr(cur, 2, right), - -1, rrecs); - xfs_btree_log_recs(cur, rbp, 1, rrecs); - - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - cur->bc_ops->init_key_from_rec(&key, - xfs_btree_rec_addr(cur, 1, right)); - rkp = &key; - } - - /* Update the parent key values of right. */ - error = xfs_btree_updkey(cur, rkp, level + 1); - if (error) - goto error0; - - /* Slide the cursor value left one. */ - cur->bc_ptrs[level]--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_btree_rshift( - struct xfs_btree_cur *cur, - int level, - int *stat) /* success/failure */ -{ - union xfs_btree_key key; /* btree key */ - struct xfs_buf *lbp; /* left buffer pointer */ - struct xfs_btree_block *left; /* left btree block */ - struct xfs_buf *rbp; /* right buffer pointer */ - struct xfs_btree_block *right; /* right btree block */ - struct xfs_btree_cur *tcur; /* temporary btree cursor */ - union xfs_btree_ptr rptr; /* right block pointer */ - union xfs_btree_key *rkp; /* right btree key */ - int rrecs; /* right record count */ - int lrecs; /* left record count */ - int error; /* error return value */ - int i; /* loop counter */ - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) - goto out0; - - /* Set up variables for this block as "left". */ - left = xfs_btree_get_block(cur, level, &lbp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, left, level, lbp); - if (error) - goto error0; -#endif - - /* If we've got no right sibling then we can't shift an entry right. */ - xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); - if (xfs_btree_ptr_is_null(cur, &rptr)) - goto out0; - - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - lrecs = xfs_btree_get_numrecs(left); - if (cur->bc_ptrs[level] >= lrecs) - goto out0; - - /* Set up the right neighbor as "right". */ - error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); - if (error) - goto error0; - - /* If it's full, it can't take another entry. */ - rrecs = xfs_btree_get_numrecs(right); - if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) - goto out0; - - XFS_BTREE_STATS_INC(cur, rshift); - XFS_BTREE_STATS_ADD(cur, moves, rrecs); - - /* - * Make a hole at the start of the right neighbor block, then - * copy the last left block entry to the hole. - */ - if (level > 0) { - /* It's a nonleaf. make a hole in the keys and ptrs */ - union xfs_btree_key *lkp; - union xfs_btree_ptr *lpp; - union xfs_btree_ptr *rpp; - - lkp = xfs_btree_key_addr(cur, lrecs, left); - lpp = xfs_btree_ptr_addr(cur, lrecs, left); - rkp = xfs_btree_key_addr(cur, 1, right); - rpp = xfs_btree_ptr_addr(cur, 1, right); - -#ifdef DEBUG - for (i = rrecs - 1; i >= 0; i--) { - error = xfs_btree_check_ptr(cur, rpp, i, level); - if (error) - goto error0; - } -#endif - - xfs_btree_shift_keys(cur, rkp, 1, rrecs); - xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); - -#ifdef DEBUG - error = xfs_btree_check_ptr(cur, lpp, 0, level); - if (error) - goto error0; -#endif - - /* Now put the new data in, and log it. */ - xfs_btree_copy_keys(cur, rkp, lkp, 1); - xfs_btree_copy_ptrs(cur, rpp, lpp, 1); - - xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); - xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); - - ASSERT(cur->bc_ops->keys_inorder(cur, rkp, - xfs_btree_key_addr(cur, 2, right))); - } else { - /* It's a leaf. make a hole in the records */ - union xfs_btree_rec *lrp; - union xfs_btree_rec *rrp; - - lrp = xfs_btree_rec_addr(cur, lrecs, left); - rrp = xfs_btree_rec_addr(cur, 1, right); - - xfs_btree_shift_recs(cur, rrp, 1, rrecs); - - /* Now put the new data in, and log it. */ - xfs_btree_copy_recs(cur, rrp, lrp, 1); - xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); - - cur->bc_ops->init_key_from_rec(&key, rrp); - rkp = &key; - - ASSERT(cur->bc_ops->recs_inorder(cur, rrp, - xfs_btree_rec_addr(cur, 2, right))); - } - - /* - * Decrement and log left's numrecs, bump and log right's numrecs. - */ - xfs_btree_set_numrecs(left, --lrecs); - xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); - - xfs_btree_set_numrecs(right, ++rrecs); - xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); - - /* - * Using a temporary cursor, update the parent key values of the - * block on the right. - */ - error = xfs_btree_dup_cursor(cur, &tcur); - if (error) - goto error0; - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - error = xfs_btree_increment(tcur, level, &i); - if (error) - goto error1; - - error = xfs_btree_updkey(tcur, rkp, level + 1); - if (error) - goto error1; - - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - -error1: - XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Split cur/level block in half. - * Return new block number and the key to its first - * record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_btree_split( - struct xfs_btree_cur *cur, - int level, - union xfs_btree_ptr *ptrp, - union xfs_btree_key *key, - struct xfs_btree_cur **curp, - int *stat) /* success/failure */ -{ - union xfs_btree_ptr lptr; /* left sibling block ptr */ - struct xfs_buf *lbp; /* left buffer pointer */ - struct xfs_btree_block *left; /* left btree block */ - union xfs_btree_ptr rptr; /* right sibling block ptr */ - struct xfs_buf *rbp; /* right buffer pointer */ - struct xfs_btree_block *right; /* right btree block */ - union xfs_btree_ptr rrptr; /* right-right sibling ptr */ - struct xfs_buf *rrbp; /* right-right buffer pointer */ - struct xfs_btree_block *rrblock; /* right-right btree block */ - int lrecs; - int rrecs; - int src_index; - int error; /* error return value */ -#ifdef DEBUG - int i; -#endif - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); - - XFS_BTREE_STATS_INC(cur, split); - - /* Set up left block (current one). */ - left = xfs_btree_get_block(cur, level, &lbp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, left, level, lbp); - if (error) - goto error0; -#endif - - xfs_btree_buf_to_ptr(cur, lbp, &lptr); - - /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); - if (error) - goto error0; - if (*stat == 0) - goto out0; - XFS_BTREE_STATS_INC(cur, alloc); - - /* Set up the new block as "right". */ - error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); - if (error) - goto error0; - - /* Fill in the btree header for the new right block. */ - xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); - - /* - * Split the entries between the old and the new block evenly. - * Make sure that if there's an odd number of entries now, that - * each new block will have the same number of entries. - */ - lrecs = xfs_btree_get_numrecs(left); - rrecs = lrecs / 2; - if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) - rrecs++; - src_index = (lrecs - rrecs + 1); - - XFS_BTREE_STATS_ADD(cur, moves, rrecs); - - /* - * Copy btree block entries from the left block over to the - * new block, the right. Update the right block and log the - * changes. - */ - if (level > 0) { - /* It's a non-leaf. Move keys and pointers. */ - union xfs_btree_key *lkp; /* left btree key */ - union xfs_btree_ptr *lpp; /* left address pointer */ - union xfs_btree_key *rkp; /* right btree key */ - union xfs_btree_ptr *rpp; /* right address pointer */ - - lkp = xfs_btree_key_addr(cur, src_index, left); - lpp = xfs_btree_ptr_addr(cur, src_index, left); - rkp = xfs_btree_key_addr(cur, 1, right); - rpp = xfs_btree_ptr_addr(cur, 1, right); - -#ifdef DEBUG - for (i = src_index; i < rrecs; i++) { - error = xfs_btree_check_ptr(cur, lpp, i, level); - if (error) - goto error0; - } -#endif - - xfs_btree_copy_keys(cur, rkp, lkp, rrecs); - xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); - - xfs_btree_log_keys(cur, rbp, 1, rrecs); - xfs_btree_log_ptrs(cur, rbp, 1, rrecs); - - /* Grab the keys to the entries moved to the right block */ - xfs_btree_copy_keys(cur, key, rkp, 1); - } else { - /* It's a leaf. Move records. */ - union xfs_btree_rec *lrp; /* left record pointer */ - union xfs_btree_rec *rrp; /* right record pointer */ - - lrp = xfs_btree_rec_addr(cur, src_index, left); - rrp = xfs_btree_rec_addr(cur, 1, right); - - xfs_btree_copy_recs(cur, rrp, lrp, rrecs); - xfs_btree_log_recs(cur, rbp, 1, rrecs); - - cur->bc_ops->init_key_from_rec(key, - xfs_btree_rec_addr(cur, 1, right)); - } - - - /* - * Find the left block number by looking in the buffer. - * Adjust numrecs, sibling pointers. - */ - xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); - xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); - xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); - - lrecs -= rrecs; - xfs_btree_set_numrecs(left, lrecs); - xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); - - xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); - xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - - /* - * If there's a block to the new block's right, make that block - * point back to right instead of to left. - */ - if (!xfs_btree_ptr_is_null(cur, &rrptr)) { - error = xfs_btree_read_buf_block(cur, &rrptr, - 0, &rrblock, &rrbp); - if (error) - goto error0; - xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); - xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - /* - * If the cursor is really in the right block, move it there. - * If it's just pointing past the last entry in left, then we'll - * insert there, so don't change anything in that case. - */ - if (cur->bc_ptrs[level] > lrecs + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= lrecs; - } - /* - * If there are more levels, we'll need another cursor which refers - * the right block, no matter where this cursor was. - */ - if (level + 1 < cur->bc_nlevels) { - error = xfs_btree_dup_cursor(cur, curp); - if (error) - goto error0; - (*curp)->bc_ptrs[level + 1]++; - } - *ptrp = rptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Copy the old inode root contents into a real block and make the - * broot point to it. - */ -int /* error */ -xfs_btree_new_iroot( - struct xfs_btree_cur *cur, /* btree cursor */ - int *logflags, /* logging flags for inode */ - int *stat) /* return status - 0 fail */ -{ - struct xfs_buf *cbp; /* buffer for cblock */ - struct xfs_btree_block *block; /* btree block */ - struct xfs_btree_block *cblock; /* child btree block */ - union xfs_btree_key *ckp; /* child key pointer */ - union xfs_btree_ptr *cpp; /* child ptr pointer */ - union xfs_btree_key *kp; /* pointer to btree key */ - union xfs_btree_ptr *pp; /* pointer to block addr */ - union xfs_btree_ptr nptr; /* new block addr */ - int level; /* btree level */ - int error; /* error return code */ -#ifdef DEBUG - int i; /* loop counter */ -#endif - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, newroot); - - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); - - level = cur->bc_nlevels - 1; - - block = xfs_btree_get_iroot(cur); - pp = xfs_btree_ptr_addr(cur, 1, block); - - /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); - if (error) - goto error0; - if (*stat == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; - } - XFS_BTREE_STATS_INC(cur, alloc); - - /* Copy the root into a real block. */ - error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); - if (error) - goto error0; - - /* - * we can't just memcpy() the root in for CRC enabled btree blocks. - * In that case have to also ensure the blkno remains correct - */ - memcpy(cblock, block, xfs_btree_block_len(cur)); - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); - else - cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); - } - - be16_add_cpu(&block->bb_level, 1); - xfs_btree_set_numrecs(block, 1); - cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; - - kp = xfs_btree_key_addr(cur, 1, block); - ckp = xfs_btree_key_addr(cur, 1, cblock); - xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); - - cpp = xfs_btree_ptr_addr(cur, 1, cblock); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - error = xfs_btree_check_ptr(cur, pp, i, level); - if (error) - goto error0; - } -#endif - xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); - -#ifdef DEBUG - error = xfs_btree_check_ptr(cur, &nptr, 0, level); - if (error) - goto error0; -#endif - xfs_btree_copy_ptrs(cur, pp, &nptr, 1); - - xfs_iroot_realloc(cur->bc_private.b.ip, - 1 - xfs_btree_get_numrecs(cblock), - cur->bc_private.b.whichfork); - - xfs_btree_setbuf(cur, level, cbp); - - /* - * Do all this logging at the end so that - * the root is at the right level. - */ - xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); - xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); - xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); - - *logflags |= - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); - *stat = 1; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Allocate a new root block, fill it in. - */ -STATIC int /* error */ -xfs_btree_new_root( - struct xfs_btree_cur *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - struct xfs_btree_block *block; /* one half of the old root block */ - struct xfs_buf *bp; /* buffer containing block */ - int error; /* error return value */ - struct xfs_buf *lbp; /* left buffer pointer */ - struct xfs_btree_block *left; /* left btree block */ - struct xfs_buf *nbp; /* new (root) buffer */ - struct xfs_btree_block *new; /* new (root) btree block */ - int nptr; /* new value for key index, 1 or 2 */ - struct xfs_buf *rbp; /* right buffer pointer */ - struct xfs_btree_block *right; /* right btree block */ - union xfs_btree_ptr rptr; - union xfs_btree_ptr lptr; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, newroot); - - /* initialise our start point from the cursor */ - cur->bc_ops->init_ptr_from_cur(cur, &rptr); - - /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); - if (error) - goto error0; - if (*stat == 0) - goto out0; - XFS_BTREE_STATS_INC(cur, alloc); - - /* Set up the new block. */ - error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); - if (error) - goto error0; - - /* Set the root in the holding structure increasing the level by 1. */ - cur->bc_ops->set_root(cur, &lptr, 1); - - /* - * At the previous root level there are now two blocks: the old root, - * and the new block generated when it was split. We don't know which - * one the cursor is pointing at, so we set up variables "left" and - * "right" for each case. - */ - block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); - if (error) - goto error0; -#endif - - xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); - if (!xfs_btree_ptr_is_null(cur, &rptr)) { - /* Our block is left, pick up the right block. */ - lbp = bp; - xfs_btree_buf_to_ptr(cur, lbp, &lptr); - left = block; - error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); - if (error) - goto error0; - bp = rbp; - nptr = 1; - } else { - /* Our block is right, pick up the left block. */ - rbp = bp; - xfs_btree_buf_to_ptr(cur, rbp, &rptr); - right = block; - xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); - if (error) - goto error0; - bp = lbp; - nptr = 2; - } - /* Fill in the new block's btree header and log it. */ - xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); - xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); - ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && - !xfs_btree_ptr_is_null(cur, &rptr)); - - /* Fill in the key data in the new root. */ - if (xfs_btree_get_level(left) > 0) { - xfs_btree_copy_keys(cur, - xfs_btree_key_addr(cur, 1, new), - xfs_btree_key_addr(cur, 1, left), 1); - xfs_btree_copy_keys(cur, - xfs_btree_key_addr(cur, 2, new), - xfs_btree_key_addr(cur, 1, right), 1); - } else { - cur->bc_ops->init_key_from_rec( - xfs_btree_key_addr(cur, 1, new), - xfs_btree_rec_addr(cur, 1, left)); - cur->bc_ops->init_key_from_rec( - xfs_btree_key_addr(cur, 2, new), - xfs_btree_rec_addr(cur, 1, right)); - } - xfs_btree_log_keys(cur, nbp, 1, 2); - - /* Fill in the pointer data in the new root. */ - xfs_btree_copy_ptrs(cur, - xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); - xfs_btree_copy_ptrs(cur, - xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); - xfs_btree_log_ptrs(cur, nbp, 1, 2); - - /* Fix up the cursor. */ - xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; - cur->bc_nlevels++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; -} - -STATIC int -xfs_btree_make_block_unfull( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* btree level */ - int numrecs,/* # of recs in block */ - int *oindex,/* old tree index */ - int *index, /* new tree index */ - union xfs_btree_ptr *nptr, /* new btree ptr */ - struct xfs_btree_cur **ncur, /* new btree cursor */ - union xfs_btree_rec *nrec, /* new record */ - int *stat) -{ - union xfs_btree_key key; /* new btree key value */ - int error = 0; - - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - level == cur->bc_nlevels - 1) { - struct xfs_inode *ip = cur->bc_private.b.ip; - - if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { - /* A root block that can be made bigger. */ - xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); - } else { - /* A root block that needs replacing */ - int logflags = 0; - - error = xfs_btree_new_iroot(cur, &logflags, stat); - if (error || *stat == 0) - return error; - - xfs_trans_log_inode(cur->bc_tp, ip, logflags); - } - - return 0; - } - - /* First, try shifting an entry to the right neighbor. */ - error = xfs_btree_rshift(cur, level, stat); - if (error || *stat) - return error; - - /* Next, try shifting an entry to the left neighbor. */ - error = xfs_btree_lshift(cur, level, stat); - if (error) - return error; - - if (*stat) { - *oindex = *index = cur->bc_ptrs[level]; - return 0; - } - - /* - * Next, try splitting the current block in half. - * - * If this works we have to re-set our variables because we - * could be in a different block now. - */ - error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); - if (error || *stat == 0) - return error; - - - *index = cur->bc_ptrs[level]; - cur->bc_ops->init_rec_from_key(&key, nrec); - return 0; -} - -/* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int -xfs_btree_insrec( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* level to insert record at */ - union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ - union xfs_btree_rec *recp, /* i/o: record data inserted */ - struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ - int *stat) /* success/failure */ -{ - struct xfs_btree_block *block; /* btree block */ - struct xfs_buf *bp; /* buffer for block */ - union xfs_btree_key key; /* btree key */ - union xfs_btree_ptr nptr; /* new block ptr */ - struct xfs_btree_cur *ncur; /* new btree cursor */ - union xfs_btree_rec nrec; /* new record count */ - int optr; /* old key/record index */ - int ptr; /* key/record index */ - int numrecs;/* number of records */ - int error; /* error return value */ -#ifdef DEBUG - int i; -#endif - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); - - ncur = NULL; - - /* - * If we have an external root pointer, and we've made it to the - * root level, allocate a new root block and we're done. - */ - if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level >= cur->bc_nlevels)) { - error = xfs_btree_new_root(cur, stat); - xfs_btree_set_ptr_null(cur, ptrp); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return error; - } - - /* If we're off the left edge, return failure. */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - - /* Make a key out of the record data to be inserted, and save it. */ - cur->bc_ops->init_key_from_rec(&key, recp); - - optr = ptr; - - XFS_BTREE_STATS_INC(cur, insrec); - - /* Get pointers to the btree buffer and block. */ - block = xfs_btree_get_block(cur, level, &bp); - numrecs = xfs_btree_get_numrecs(block); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) - goto error0; - - /* Check that the new entry is being inserted in the right place. */ - if (ptr <= numrecs) { - if (level == 0) { - ASSERT(cur->bc_ops->recs_inorder(cur, recp, - xfs_btree_rec_addr(cur, ptr, block))); - } else { - ASSERT(cur->bc_ops->keys_inorder(cur, &key, - xfs_btree_key_addr(cur, ptr, block))); - } - } -#endif - - /* - * If the block is full, we can't insert the new entry until we - * make the block un-full. - */ - xfs_btree_set_ptr_null(cur, &nptr); - if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { - error = xfs_btree_make_block_unfull(cur, level, numrecs, - &optr, &ptr, &nptr, &ncur, &nrec, stat); - if (error || *stat == 0) - goto error0; - } - - /* - * The current block may have changed if the block was - * previously full and we have just made space in it. - */ - block = xfs_btree_get_block(cur, level, &bp); - numrecs = xfs_btree_get_numrecs(block); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) - return error; -#endif - - /* - * At this point we know there's room for our new entry in the block - * we're pointing at. - */ - XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); - - if (level > 0) { - /* It's a nonleaf. make a hole in the keys and ptrs */ - union xfs_btree_key *kp; - union xfs_btree_ptr *pp; - - kp = xfs_btree_key_addr(cur, ptr, block); - pp = xfs_btree_ptr_addr(cur, ptr, block); - -#ifdef DEBUG - for (i = numrecs - ptr; i >= 0; i--) { - error = xfs_btree_check_ptr(cur, pp, i, level); - if (error) - return error; - } -#endif - - xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); - xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); - -#ifdef DEBUG - error = xfs_btree_check_ptr(cur, ptrp, 0, level); - if (error) - goto error0; -#endif - - /* Now put the new data in, bump numrecs and log it. */ - xfs_btree_copy_keys(cur, kp, &key, 1); - xfs_btree_copy_ptrs(cur, pp, ptrp, 1); - numrecs++; - xfs_btree_set_numrecs(block, numrecs); - xfs_btree_log_ptrs(cur, bp, ptr, numrecs); - xfs_btree_log_keys(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) { - ASSERT(cur->bc_ops->keys_inorder(cur, kp, - xfs_btree_key_addr(cur, ptr + 1, block))); - } -#endif - } else { - /* It's a leaf. make a hole in the records */ - union xfs_btree_rec *rp; - - rp = xfs_btree_rec_addr(cur, ptr, block); - - xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); - - /* Now put the new data in, bump numrecs and log it. */ - xfs_btree_copy_recs(cur, rp, recp, 1); - xfs_btree_set_numrecs(block, ++numrecs); - xfs_btree_log_recs(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) { - ASSERT(cur->bc_ops->recs_inorder(cur, rp, - xfs_btree_rec_addr(cur, ptr + 1, block))); - } -#endif - } - - /* Log the new number of records in the btree header. */ - xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); - - /* If we inserted at the start of a block, update the parents' keys. */ - if (optr == 1) { - error = xfs_btree_updkey(cur, &key, level + 1); - if (error) - goto error0; - } - - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, level)) { - cur->bc_ops->update_lastrec(cur, block, recp, - ptr, LASTREC_INSREC); - } - - /* - * Return the new block number, if any. - * If there is one, give back a record value and a cursor too. - */ - *ptrp = nptr; - if (!xfs_btree_ptr_is_null(cur, &nptr)) { - *recp = nrec; - *curp = ncur; - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Insert the record at the point referenced by cur. - * - * A multi-level split of the tree on insert will invalidate the original - * cursor. All callers of this function should assume that the cursor is - * no longer valid and revalidate it. - */ -int -xfs_btree_insert( - struct xfs_btree_cur *cur, - int *stat) -{ - int error; /* error return value */ - int i; /* result value, 0 for failure */ - int level; /* current level number in btree */ - union xfs_btree_ptr nptr; /* new block number (split result) */ - struct xfs_btree_cur *ncur; /* new cursor (split result) */ - struct xfs_btree_cur *pcur; /* previous level's cursor */ - union xfs_btree_rec rec; /* record to insert */ - - level = 0; - ncur = NULL; - pcur = cur; - - xfs_btree_set_ptr_null(cur, &nptr); - cur->bc_ops->init_rec_from_cur(cur, &rec); - - /* - * Loop going up the tree, starting at the leaf level. - * Stop when we don't get a split block, that must mean that - * the insert is finished with this level. - */ - do { - /* - * Insert nrec/nptr into this level of the tree. - * Note if we fail, nptr will be null. - */ - error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); - if (error) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - goto error0; - } - - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - level++; - - /* - * See if the cursor we just used is trash. - * Can't trash the caller's cursor, but otherwise we should - * if ncur is a new cursor or we're about to be done. - */ - if (pcur != cur && - (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { - /* Save the state from the cursor before we trash it */ - if (cur->bc_ops->update_cursor) - cur->bc_ops->update_cursor(pcur, cur); - cur->bc_nlevels = pcur->bc_nlevels; - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - /* If we got a new cursor, switch to it. */ - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (!xfs_btree_ptr_is_null(cur, &nptr)); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = i; - return 0; -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Try to merge a non-leaf block back into the inode root. - * - * Note: the killroot names comes from the fact that we're effectively - * killing the old root block. But because we can't just delete the - * inode we have to copy the single block it was pointing to into the - * inode. - */ -STATIC int -xfs_btree_kill_iroot( - struct xfs_btree_cur *cur) -{ - int whichfork = cur->bc_private.b.whichfork; - struct xfs_inode *ip = cur->bc_private.b.ip; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - struct xfs_btree_block *block; - struct xfs_btree_block *cblock; - union xfs_btree_key *kp; - union xfs_btree_key *ckp; - union xfs_btree_ptr *pp; - union xfs_btree_ptr *cpp; - struct xfs_buf *cbp; - int level; - int index; - int numrecs; -#ifdef DEBUG - union xfs_btree_ptr ptr; - int i; -#endif - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); - ASSERT(cur->bc_nlevels > 1); - - /* - * Don't deal with the root block needs to be a leaf case. - * We're just going to turn the thing back into extents anyway. - */ - level = cur->bc_nlevels - 1; - if (level == 1) - goto out0; - - /* - * Give up if the root has multiple children. - */ - block = xfs_btree_get_iroot(cur); - if (xfs_btree_get_numrecs(block) != 1) - goto out0; - - cblock = xfs_btree_get_block(cur, level - 1, &cbp); - numrecs = xfs_btree_get_numrecs(cblock); - - /* - * Only do this if the next level will fit. - * Then the data must be copied up to the inode, - * instead of freeing the root you free the next level. - */ - if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) - goto out0; - - XFS_BTREE_STATS_INC(cur, killroot); - -#ifdef DEBUG - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); - ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); - ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); -#endif - - index = numrecs - cur->bc_ops->get_maxrecs(cur, level); - if (index) { - xfs_iroot_realloc(cur->bc_private.b.ip, index, - cur->bc_private.b.whichfork); - block = ifp->if_broot; - } - - be16_add_cpu(&block->bb_numrecs, index); - ASSERT(block->bb_numrecs == cblock->bb_numrecs); - - kp = xfs_btree_key_addr(cur, 1, block); - ckp = xfs_btree_key_addr(cur, 1, cblock); - xfs_btree_copy_keys(cur, kp, ckp, numrecs); - - pp = xfs_btree_ptr_addr(cur, 1, block); - cpp = xfs_btree_ptr_addr(cur, 1, cblock); -#ifdef DEBUG - for (i = 0; i < numrecs; i++) { - int error; - - error = xfs_btree_check_ptr(cur, cpp, i, level - 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - } -#endif - xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); - - cur->bc_ops->free_block(cur, cbp); - XFS_BTREE_STATS_INC(cur, free); - - cur->bc_bufs[level - 1] = NULL; - be16_add_cpu(&block->bb_level, -1); - xfs_trans_log_inode(cur->bc_tp, ip, - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); - cur->bc_nlevels--; -out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - -/* - * Kill the current root node, and replace it with it's only child node. - */ -STATIC int -xfs_btree_kill_root( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - union xfs_btree_ptr *newroot) -{ - int error; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, killroot); - - /* - * Update the root pointer, decreasing the level by 1 and then - * free the old root. - */ - cur->bc_ops->set_root(cur, newroot, -1); - - error = cur->bc_ops->free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - XFS_BTREE_STATS_INC(cur, free); - - cur->bc_bufs[level] = NULL; - cur->bc_ra[level] = 0; - cur->bc_nlevels--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - -STATIC int -xfs_btree_dec_cursor( - struct xfs_btree_cur *cur, - int level, - int *stat) -{ - int error; - int i; - - if (level > 0) { - error = xfs_btree_decrement(cur, level, &i); - if (error) - return error; - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; -} - -/* - * Single level of the btree record deletion routine. - * Delete record pointed to by cur/level. - * Remove the record from its block then rebalance the tree. - * Return 0 for error, 1 for done, 2 to go on to the next level. - */ -STATIC int /* error */ -xfs_btree_delrec( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* level removing record from */ - int *stat) /* fail/done/go-on */ -{ - struct xfs_btree_block *block; /* btree block */ - union xfs_btree_ptr cptr; /* current block ptr */ - struct xfs_buf *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop counter */ - union xfs_btree_key key; /* storage for keyp */ - union xfs_btree_key *keyp = &key; /* passed to the next level */ - union xfs_btree_ptr lptr; /* left sibling block ptr */ - struct xfs_buf *lbp; /* left buffer pointer */ - struct xfs_btree_block *left; /* left btree block */ - int lrecs = 0; /* left record count */ - int ptr; /* key/record index */ - union xfs_btree_ptr rptr; /* right sibling block ptr */ - struct xfs_buf *rbp; /* right buffer pointer */ - struct xfs_btree_block *right; /* right btree block */ - struct xfs_btree_block *rrblock; /* right-right btree block */ - struct xfs_buf *rrbp; /* right-right buffer pointer */ - int rrecs = 0; /* right record count */ - struct xfs_btree_cur *tcur; /* temporary btree cursor */ - int numrecs; /* temporary numrec count */ - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - - tcur = NULL; - - /* Get the index of the entry being deleted, check for nothing there. */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - - /* Get the buffer & block containing the record or key/ptr. */ - block = xfs_btree_get_block(cur, level, &bp); - numrecs = xfs_btree_get_numrecs(block); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, level, bp); - if (error) - goto error0; -#endif - - /* Fail if we're off the end of the block. */ - if (ptr > numrecs) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - - XFS_BTREE_STATS_INC(cur, delrec); - XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); - - /* Excise the entries being deleted. */ - if (level > 0) { - /* It's a nonleaf. operate on keys and ptrs */ - union xfs_btree_key *lkp; - union xfs_btree_ptr *lpp; - - lkp = xfs_btree_key_addr(cur, ptr + 1, block); - lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); - -#ifdef DEBUG - for (i = 0; i < numrecs - ptr; i++) { - error = xfs_btree_check_ptr(cur, lpp, i, level); - if (error) - goto error0; - } -#endif - - if (ptr < numrecs) { - xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); - xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); - xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); - xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); - } - - /* - * If it's the first record in the block, we'll need to pass a - * key up to the next level (updkey). - */ - if (ptr == 1) - keyp = xfs_btree_key_addr(cur, 1, block); - } else { - /* It's a leaf. operate on records */ - if (ptr < numrecs) { - xfs_btree_shift_recs(cur, - xfs_btree_rec_addr(cur, ptr + 1, block), - -1, numrecs - ptr); - xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); - } - - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - if (ptr == 1) { - cur->bc_ops->init_key_from_rec(&key, - xfs_btree_rec_addr(cur, 1, block)); - keyp = &key; - } - } - - /* - * Decrement and log the number of entries in the block. - */ - xfs_btree_set_numrecs(block, --numrecs); - xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); - - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, level)) { - cur->bc_ops->update_lastrec(cur, block, NULL, - ptr, LASTREC_DELREC); - } - - /* - * We're at the root level. First, shrink the root block in-memory. - * Try to get rid of the next level down. If we can't then there's - * nothing left to do. - */ - if (level == cur->bc_nlevels - 1) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { - xfs_iroot_realloc(cur->bc_private.b.ip, -1, - cur->bc_private.b.whichfork); - - error = xfs_btree_kill_iroot(cur); - if (error) - goto error0; - - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - *stat = 1; - return 0; - } - - /* - * If this is the root level, and there's only one entry left, - * and it's NOT the leaf level, then we can get rid of this - * level. - */ - if (numrecs == 1 && level > 0) { - union xfs_btree_ptr *pp; - /* - * pp is still set to the first pointer in the block. - * Make it the new root of the btree. - */ - pp = xfs_btree_ptr_addr(cur, 1, block); - error = xfs_btree_kill_root(cur, bp, level, pp); - if (error) - goto error0; - } else if (level > 0) { - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - } - *stat = 1; - return 0; - } - - /* - * If we deleted the leftmost entry in the block, update the - * key values above us in the tree. - */ - if (ptr == 1) { - error = xfs_btree_updkey(cur, keyp, level + 1); - if (error) - goto error0; - } - - /* - * If the number of records remaining in the block is at least - * the minimum, we're done. - */ - if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - return 0; - } - - /* - * Otherwise, we have to move some records around to keep the - * tree balanced. Look at the left and right sibling blocks to - * see if we can re-balance by moving only one record. - */ - xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); - xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); - - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { - /* - * One child of root, need to get a chance to copy its contents - * into the root and delete it. Can't go up to next level, - * there's nothing to delete there. - */ - if (xfs_btree_ptr_is_null(cur, &rptr) && - xfs_btree_ptr_is_null(cur, &lptr) && - level == cur->bc_nlevels - 2) { - error = xfs_btree_kill_iroot(cur); - if (!error) - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - return 0; - } - } - - ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || - !xfs_btree_ptr_is_null(cur, &lptr)); - - /* - * Duplicate the cursor so our btree manipulations here won't - * disrupt the next level up. - */ - error = xfs_btree_dup_cursor(cur, &tcur); - if (error) - goto error0; - - /* - * If there's a right sibling, see if it's ok to shift an entry - * out of it. - */ - if (!xfs_btree_ptr_is_null(cur, &rptr)) { - /* - * Move the temp cursor to the last entry in the next block. - * Actually any entry but the first would suffice. - */ - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - error = xfs_btree_increment(tcur, level, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - /* Grab a pointer to the block. */ - right = xfs_btree_get_block(tcur, level, &rbp); -#ifdef DEBUG - error = xfs_btree_check_block(tcur, right, level, rbp); - if (error) - goto error0; -#endif - /* Grab the current block number, for future use. */ - xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); - - /* - * If right block is full enough so that removing one entry - * won't make it too empty, and left-shifting an entry out - * of right to us works, we're done. - */ - if (xfs_btree_get_numrecs(right) - 1 >= - cur->bc_ops->get_minrecs(tcur, level)) { - error = xfs_btree_lshift(tcur, level, &i); - if (error) - goto error0; - if (i) { - ASSERT(xfs_btree_get_numrecs(block) >= - cur->bc_ops->get_minrecs(tcur, level)); - - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - return 0; - } - } - - /* - * Otherwise, grab the number of records in right for - * future reference, and fix up the temp cursor to point - * to our block again (last record). - */ - rrecs = xfs_btree_get_numrecs(right); - if (!xfs_btree_ptr_is_null(cur, &lptr)) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - error = xfs_btree_decrement(tcur, level, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - } - - /* - * If there's a left sibling, see if it's ok to shift an entry - * out of it. - */ - if (!xfs_btree_ptr_is_null(cur, &lptr)) { - /* - * Move the temp cursor to the first entry in the - * previous block. - */ - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - error = xfs_btree_decrement(tcur, level, &i); - if (error) - goto error0; - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - /* Grab a pointer to the block. */ - left = xfs_btree_get_block(tcur, level, &lbp); -#ifdef DEBUG - error = xfs_btree_check_block(cur, left, level, lbp); - if (error) - goto error0; -#endif - /* Grab the current block number, for future use. */ - xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); - - /* - * If left block is full enough so that removing one entry - * won't make it too empty, and right-shifting an entry out - * of left to us works, we're done. - */ - if (xfs_btree_get_numrecs(left) - 1 >= - cur->bc_ops->get_minrecs(tcur, level)) { - error = xfs_btree_rshift(tcur, level, &i); - if (error) - goto error0; - if (i) { - ASSERT(xfs_btree_get_numrecs(block) >= - cur->bc_ops->get_minrecs(tcur, level)); - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - if (level == 0) - cur->bc_ptrs[0]++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 1; - return 0; - } - } - - /* - * Otherwise, grab the number of records in right for - * future reference. - */ - lrecs = xfs_btree_get_numrecs(left); - } - - /* Delete the temp cursor, we're done with it. */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - - /* If here, we need to do a join to keep the tree balanced. */ - ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); - - if (!xfs_btree_ptr_is_null(cur, &lptr) && - lrecs + xfs_btree_get_numrecs(block) <= - cur->bc_ops->get_maxrecs(cur, level)) { - /* - * Set "right" to be the starting block, - * "left" to be the left neighbor. - */ - rptr = cptr; - right = block; - rbp = bp; - error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); - if (error) - goto error0; - - /* - * If that won't work, see if we can join with the right neighbor block. - */ - } else if (!xfs_btree_ptr_is_null(cur, &rptr) && - rrecs + xfs_btree_get_numrecs(block) <= - cur->bc_ops->get_maxrecs(cur, level)) { - /* - * Set "left" to be the starting block, - * "right" to be the right neighbor. - */ - lptr = cptr; - left = block; - lbp = bp; - error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); - if (error) - goto error0; - - /* - * Otherwise, we can't fix the imbalance. - * Just return. This is probably a logic error, but it's not fatal. - */ - } else { - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - return 0; - } - - rrecs = xfs_btree_get_numrecs(right); - lrecs = xfs_btree_get_numrecs(left); - - /* - * We're now going to join "left" and "right" by moving all the stuff - * in "right" to "left" and deleting "right". - */ - XFS_BTREE_STATS_ADD(cur, moves, rrecs); - if (level > 0) { - /* It's a non-leaf. Move keys and pointers. */ - union xfs_btree_key *lkp; /* left btree key */ - union xfs_btree_ptr *lpp; /* left address pointer */ - union xfs_btree_key *rkp; /* right btree key */ - union xfs_btree_ptr *rpp; /* right address pointer */ - - lkp = xfs_btree_key_addr(cur, lrecs + 1, left); - lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); - rkp = xfs_btree_key_addr(cur, 1, right); - rpp = xfs_btree_ptr_addr(cur, 1, right); -#ifdef DEBUG - for (i = 1; i < rrecs; i++) { - error = xfs_btree_check_ptr(cur, rpp, i, level); - if (error) - goto error0; - } -#endif - xfs_btree_copy_keys(cur, lkp, rkp, rrecs); - xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); - - xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); - xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); - } else { - /* It's a leaf. Move records. */ - union xfs_btree_rec *lrp; /* left record pointer */ - union xfs_btree_rec *rrp; /* right record pointer */ - - lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); - rrp = xfs_btree_rec_addr(cur, 1, right); - - xfs_btree_copy_recs(cur, lrp, rrp, rrecs); - xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); - } - - XFS_BTREE_STATS_INC(cur, join); - - /* - * Fix up the number of records and right block pointer in the - * surviving block, and log it. - */ - xfs_btree_set_numrecs(left, lrecs + rrecs); - xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), - xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); - xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - - /* If there is a right sibling, point it to the remaining block. */ - xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); - if (!xfs_btree_ptr_is_null(cur, &cptr)) { - error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); - if (error) - goto error0; - xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); - xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - - /* Free the deleted block. */ - error = cur->bc_ops->free_block(cur, rbp); - if (error) - goto error0; - XFS_BTREE_STATS_INC(cur, free); - - /* - * If we joined with the left neighbor, set the buffer in the - * cursor to the left block, and fix up the index. - */ - if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; - } - /* - * If we joined with the right neighbor and there's a level above - * us, increment the cursor at that level. - */ - else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || - (level + 1 < cur->bc_nlevels)) { - error = xfs_btree_increment(cur, level + 1, &i); - if (error) - goto error0; - } - - /* - * Readjust the ptr at this level if it's not a leaf, since it's - * still pointing at the deletion point, which makes the cursor - * inconsistent. If this makes the ptr 0, the caller fixes it up. - * We can't use decrement because it would change the next level up. - */ - if (level > 0) - cur->bc_ptrs[level]--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - /* Return value means the next level up has something to do. */ - *stat = 2; - return 0; - -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - if (tcur) - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -int /* error */ -xfs_btree_delete( - struct xfs_btree_cur *cur, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int level; - int i; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - - /* - * Go up the tree, starting at leaf level. - * - * If 2 is returned then a join was done; go to the next level. - * Otherwise we are done. - */ - for (level = 0, i = 2; i == 2; level++) { - error = xfs_btree_delrec(cur, level, &i); - if (error) - goto error0; - } - - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - error = xfs_btree_decrement(cur, level, &i); - if (error) - goto error0; - break; - } - } - } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = i; - return 0; -error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; -} - -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_btree_get_rec( - struct xfs_btree_cur *cur, /* btree cursor */ - union xfs_btree_rec **recp, /* output: btree record */ - int *stat) /* output: success/failure */ -{ - struct xfs_btree_block *block; /* btree block */ - struct xfs_buf *bp; /* buffer pointer */ - int ptr; /* record number */ -#ifdef DEBUG - int error; /* error return value */ -#endif - - ptr = cur->bc_ptrs[0]; - block = xfs_btree_get_block(cur, 0, &bp); - -#ifdef DEBUG - error = xfs_btree_check_block(cur, block, 0, bp); - if (error) - return error; -#endif - - /* - * Off the right end or left end, return failure. - */ - if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { - *stat = 0; - return 0; - } - - /* - * Point to the record and extract its data. - */ - *recp = xfs_btree_rec_addr(cur, ptr, block); - *stat = 1; - return 0; -} - -/* - * Change the owner of a btree. - * - * The mechanism we use here is ordered buffer logging. Because we don't know - * how many buffers were are going to need to modify, we don't really want to - * have to make transaction reservations for the worst case of every buffer in a - * full size btree as that may be more space that we can fit in the log.... - * - * We do the btree walk in the most optimal manner possible - we have sibling - * pointers so we can just walk all the blocks on each level from left to right - * in a single pass, and then move to the next level and do the same. We can - * also do readahead on the sibling pointers to get IO moving more quickly, - * though for slow disks this is unlikely to make much difference to performance - * as the amount of CPU work we have to do before moving to the next block is - * relatively small. - * - * For each btree block that we load, modify the owner appropriately, set the - * buffer as an ordered buffer and log it appropriately. We need to ensure that - * we mark the region we change dirty so that if the buffer is relogged in - * a subsequent transaction the changes we make here as an ordered buffer are - * correctly relogged in that transaction. If we are in recovery context, then - * just queue the modified buffer as delayed write buffer so the transaction - * recovery completion writes the changes to disk. - */ -static int -xfs_btree_block_change_owner( - struct xfs_btree_cur *cur, - int level, - __uint64_t new_owner, - struct list_head *buffer_list) -{ - struct xfs_btree_block *block; - struct xfs_buf *bp; - union xfs_btree_ptr rptr; - - /* do right sibling readahead */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - - /* modify the owner */ - block = xfs_btree_get_block(cur, level, &bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - block->bb_u.l.bb_owner = cpu_to_be64(new_owner); - else - block->bb_u.s.bb_owner = cpu_to_be32(new_owner); - - /* - * If the block is a root block hosted in an inode, we might not have a - * buffer pointer here and we shouldn't attempt to log the change as the - * information is already held in the inode and discarded when the root - * block is formatted into the on-disk inode fork. We still change it, - * though, so everything is consistent in memory. - */ - if (bp) { - if (cur->bc_tp) { - xfs_trans_ordered_buf(cur->bc_tp, bp); - xfs_btree_log_block(cur, bp, XFS_BB_OWNER); - } else { - xfs_buf_delwri_queue(bp, buffer_list); - } - } else { - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); - ASSERT(level == cur->bc_nlevels - 1); - } - - /* now read rh sibling block for next iteration */ - xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); - if (xfs_btree_ptr_is_null(cur, &rptr)) - return ENOENT; - - return xfs_btree_lookup_get_block(cur, level, &rptr, &block); -} - -int -xfs_btree_change_owner( - struct xfs_btree_cur *cur, - __uint64_t new_owner, - struct list_head *buffer_list) -{ - union xfs_btree_ptr lptr; - int level; - struct xfs_btree_block *block = NULL; - int error = 0; - - cur->bc_ops->init_ptr_from_cur(cur, &lptr); - - /* for each level */ - for (level = cur->bc_nlevels - 1; level >= 0; level--) { - /* grab the left hand block */ - error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); - if (error) - return error; - - /* readahead the left most block for the next level down */ - if (level > 0) { - union xfs_btree_ptr *ptr; - - ptr = xfs_btree_ptr_addr(cur, 1, block); - xfs_btree_readahead_ptr(cur, ptr, 1); - - /* save for the next iteration of the loop */ - lptr = *ptr; - } - - /* for each buffer in the level */ - do { - error = xfs_btree_block_change_owner(cur, level, - new_owner, - buffer_list); - } while (!error); - - if (error != ENOENT) - return error; - } - - return 0; -} diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c deleted file mode 100644 index a1a4e3e47a1e..000000000000 --- a/fs/xfs/xfs_da_btree.c +++ /dev/null @@ -1,2665 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" - -/* - * xfs_da_btree.c - * - * Routines to implement directories as Btrees of hashed names. - */ - -/*======================================================================== - * Function prototypes for the kernel. - *========================================================================*/ - -/* - * Routines used for growing the Btree. - */ -STATIC int xfs_da3_root_split(xfs_da_state_t *state, - xfs_da_state_blk_t *existing_root, - xfs_da_state_blk_t *new_child); -STATIC int xfs_da3_node_split(xfs_da_state_t *state, - xfs_da_state_blk_t *existing_blk, - xfs_da_state_blk_t *split_blk, - xfs_da_state_blk_t *blk_to_add, - int treelevel, - int *result); -STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, - xfs_da_state_blk_t *node_blk_1, - xfs_da_state_blk_t *node_blk_2); -STATIC void xfs_da3_node_add(xfs_da_state_t *state, - xfs_da_state_blk_t *old_node_blk, - xfs_da_state_blk_t *new_node_blk); - -/* - * Routines used for shrinking the Btree. - */ -STATIC int xfs_da3_root_join(xfs_da_state_t *state, - xfs_da_state_blk_t *root_blk); -STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); -STATIC void xfs_da3_node_remove(xfs_da_state_t *state, - xfs_da_state_blk_t *drop_blk); -STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, - xfs_da_state_blk_t *src_node_blk, - xfs_da_state_blk_t *dst_node_blk); - -/* - * Utility routines. - */ -STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, - xfs_da_state_blk_t *drop_blk, - xfs_da_state_blk_t *save_blk); - - -kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ - -/* - * Allocate a dir-state structure. - * We don't put them on the stack since they're large. - */ -xfs_da_state_t * -xfs_da_state_alloc(void) -{ - return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); -} - -/* - * Kill the altpath contents of a da-state structure. - */ -STATIC void -xfs_da_state_kill_altpath(xfs_da_state_t *state) -{ - int i; - - for (i = 0; i < state->altpath.active; i++) - state->altpath.blk[i].bp = NULL; - state->altpath.active = 0; -} - -/* - * Free a da-state structure. - */ -void -xfs_da_state_free(xfs_da_state_t *state) -{ - xfs_da_state_kill_altpath(state); -#ifdef DEBUG - memset((char *)state, 0, sizeof(*state)); -#endif /* DEBUG */ - kmem_zone_free(xfs_da_state_zone, state); -} - -static bool -xfs_da3_node_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_da_intnode *hdr = bp->b_addr; - struct xfs_da3_icnode_hdr ichdr; - const struct xfs_dir_ops *ops; - - ops = xfs_dir_get_ops(mp, NULL); - - ops->node_hdr_from_disk(&ichdr, hdr); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - - if (ichdr.magic != XFS_DA3_NODE_MAGIC) - return false; - - if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; - } else { - if (ichdr.magic != XFS_DA_NODE_MAGIC) - return false; - } - if (ichdr.level == 0) - return false; - if (ichdr.level > XFS_DA_NODE_MAXDEPTH) - return false; - if (ichdr.count == 0) - return false; - - /* - * we don't know if the node is for and attribute or directory tree, - * so only fail if the count is outside both bounds - */ - if (ichdr.count > mp->m_dir_geo->node_ents && - ichdr.count > mp->m_attr_geo->node_ents) - return false; - - /* XXX: hash order check? */ - - return true; -} - -static void -xfs_da3_node_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); -} - -/* - * leaf/node format detection on trees is sketchy, so a node read can be done on - * leaf level blocks when detection identifies the tree as a node format tree - * incorrectly. In this case, we need to swap the verifier to match the correct - * format of the block being read. - */ -static void -xfs_da3_node_read_verify( - struct xfs_buf *bp) -{ - struct xfs_da_blkinfo *info = bp->b_addr; - - switch (be16_to_cpu(info->magic)) { - case XFS_DA3_NODE_MAGIC: - if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, EFSBADCRC); - break; - } - /* fall through */ - case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - break; - } - return; - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: - bp->b_ops = &xfs_attr3_leaf_buf_ops; - bp->b_ops->verify_read(bp); - return; - case XFS_DIR2_LEAFN_MAGIC: - case XFS_DIR3_LEAFN_MAGIC: - bp->b_ops = &xfs_dir3_leafn_buf_ops; - bp->b_ops->verify_read(bp); - return; - default: - break; - } - - /* corrupt block */ - xfs_verifier_error(bp); -} - -const struct xfs_buf_ops xfs_da3_node_buf_ops = { - .verify_read = xfs_da3_node_read_verify, - .verify_write = xfs_da3_node_write_verify, -}; - -int -xfs_da3_node_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp, - int which_fork) -{ - int err; - - err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, - which_fork, &xfs_da3_node_buf_ops); - if (!err && tp) { - struct xfs_da_blkinfo *info = (*bpp)->b_addr; - int type; - - switch (be16_to_cpu(info->magic)) { - case XFS_DA_NODE_MAGIC: - case XFS_DA3_NODE_MAGIC: - type = XFS_BLFT_DA_NODE_BUF; - break; - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: - type = XFS_BLFT_ATTR_LEAF_BUF; - break; - case XFS_DIR2_LEAFN_MAGIC: - case XFS_DIR3_LEAFN_MAGIC: - type = XFS_BLFT_DIR_LEAFN_BUF; - break; - default: - type = 0; - ASSERT(0); - break; - } - xfs_trans_buf_set_type(tp, *bpp, type); - } - return err; -} - -/*======================================================================== - * Routines used for growing the Btree. - *========================================================================*/ - -/* - * Create the initial contents of an intermediate node. - */ -int -xfs_da3_node_create( - struct xfs_da_args *args, - xfs_dablk_t blkno, - int level, - struct xfs_buf **bpp, - int whichfork) -{ - struct xfs_da_intnode *node; - struct xfs_trans *tp = args->trans; - struct xfs_mount *mp = tp->t_mountp; - struct xfs_da3_icnode_hdr ichdr = {0}; - struct xfs_buf *bp; - int error; - struct xfs_inode *dp = args->dp; - - trace_xfs_da_node_create(args); - ASSERT(level <= XFS_DA_NODE_MAXDEPTH); - - error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); - if (error) - return error; - bp->b_ops = &xfs_da3_node_buf_ops; - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); - node = bp->b_addr; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - - ichdr.magic = XFS_DA3_NODE_MAGIC; - hdr3->info.blkno = cpu_to_be64(bp->b_bn); - hdr3->info.owner = cpu_to_be64(args->dp->i_ino); - uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid); - } else { - ichdr.magic = XFS_DA_NODE_MAGIC; - } - ichdr.level = level; - - dp->d_ops->node_hdr_to_disk(node, &ichdr); - xfs_trans_log_buf(tp, bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); - - *bpp = bp; - return 0; -} - -/* - * Split a leaf node, rebalance, then possibly split - * intermediate nodes, rebalance, etc. - */ -int /* error */ -xfs_da3_split( - struct xfs_da_state *state) -{ - struct xfs_da_state_blk *oldblk; - struct xfs_da_state_blk *newblk; - struct xfs_da_state_blk *addblk; - struct xfs_da_intnode *node; - struct xfs_buf *bp; - int max; - int action = 0; - int error; - int i; - - trace_xfs_da_split(state->args); - - /* - * Walk back up the tree splitting/inserting/adjusting as necessary. - * If we need to insert and there isn't room, split the node, then - * decide which fragment to insert the new block from below into. - * Note that we may split the root this way, but we need more fixup. - */ - max = state->path.active - 1; - ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); - ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || - state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); - - addblk = &state->path.blk[max]; /* initial dummy value */ - for (i = max; (i >= 0) && addblk; state->path.active--, i--) { - oldblk = &state->path.blk[i]; - newblk = &state->altpath.blk[i]; - - /* - * If a leaf node then - * Allocate a new leaf node, then rebalance across them. - * else if an intermediate node then - * We split on the last layer, must we split the node? - */ - switch (oldblk->magic) { - case XFS_ATTR_LEAF_MAGIC: - error = xfs_attr3_leaf_split(state, oldblk, newblk); - if ((error != 0) && (error != ENOSPC)) { - return error; /* GROT: attr is inconsistent */ - } - if (!error) { - addblk = newblk; - break; - } - /* - * Entry wouldn't fit, split the leaf again. - */ - state->extravalid = 1; - if (state->inleaf) { - state->extraafter = 0; /* before newblk */ - trace_xfs_attr_leaf_split_before(state->args); - error = xfs_attr3_leaf_split(state, oldblk, - &state->extrablk); - } else { - state->extraafter = 1; /* after newblk */ - trace_xfs_attr_leaf_split_after(state->args); - error = xfs_attr3_leaf_split(state, newblk, - &state->extrablk); - } - if (error) - return error; /* GROT: attr inconsistent */ - addblk = newblk; - break; - case XFS_DIR2_LEAFN_MAGIC: - error = xfs_dir2_leafn_split(state, oldblk, newblk); - if (error) - return error; - addblk = newblk; - break; - case XFS_DA_NODE_MAGIC: - error = xfs_da3_node_split(state, oldblk, newblk, addblk, - max - i, &action); - addblk->bp = NULL; - if (error) - return error; /* GROT: dir is inconsistent */ - /* - * Record the newly split block for the next time thru? - */ - if (action) - addblk = newblk; - else - addblk = NULL; - break; - } - - /* - * Update the btree to show the new hashval for this child. - */ - xfs_da3_fixhashpath(state, &state->path); - } - if (!addblk) - return 0; - - /* - * Split the root node. - */ - ASSERT(state->path.active == 0); - oldblk = &state->path.blk[0]; - error = xfs_da3_root_split(state, oldblk, addblk); - if (error) { - addblk->bp = NULL; - return error; /* GROT: dir is inconsistent */ - } - - /* - * Update pointers to the node which used to be block 0 and - * just got bumped because of the addition of a new root node. - * There might be three blocks involved if a double split occurred, - * and the original block 0 could be at any position in the list. - * - * Note: the magic numbers and sibling pointers are in the same - * physical place for both v2 and v3 headers (by design). Hence it - * doesn't matter which version of the xfs_da_intnode structure we use - * here as the result will be the same using either structure. - */ - node = oldblk->bp->b_addr; - if (node->hdr.info.forw) { - if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { - bp = addblk->bp; - } else { - ASSERT(state->extravalid); - bp = state->extrablk.bp; - } - node = bp->b_addr; - node->hdr.info.back = cpu_to_be32(oldblk->blkno); - xfs_trans_log_buf(state->args->trans, bp, - XFS_DA_LOGRANGE(node, &node->hdr.info, - sizeof(node->hdr.info))); - } - node = oldblk->bp->b_addr; - if (node->hdr.info.back) { - if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { - bp = addblk->bp; - } else { - ASSERT(state->extravalid); - bp = state->extrablk.bp; - } - node = bp->b_addr; - node->hdr.info.forw = cpu_to_be32(oldblk->blkno); - xfs_trans_log_buf(state->args->trans, bp, - XFS_DA_LOGRANGE(node, &node->hdr.info, - sizeof(node->hdr.info))); - } - addblk->bp = NULL; - return 0; -} - -/* - * Split the root. We have to create a new root and point to the two - * parts (the split old root) that we just created. Copy block zero to - * the EOF, extending the inode in process. - */ -STATIC int /* error */ -xfs_da3_root_split( - struct xfs_da_state *state, - struct xfs_da_state_blk *blk1, - struct xfs_da_state_blk *blk2) -{ - struct xfs_da_intnode *node; - struct xfs_da_intnode *oldroot; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr nodehdr; - struct xfs_da_args *args; - struct xfs_buf *bp; - struct xfs_inode *dp; - struct xfs_trans *tp; - struct xfs_mount *mp; - struct xfs_dir2_leaf *leaf; - xfs_dablk_t blkno; - int level; - int error; - int size; - - trace_xfs_da_root_split(state->args); - - /* - * Copy the existing (incorrect) block from the root node position - * to a free space somewhere. - */ - args = state->args; - error = xfs_da_grow_inode(args, &blkno); - if (error) - return error; - - dp = args->dp; - tp = args->trans; - mp = state->mp; - error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); - if (error) - return error; - node = bp->b_addr; - oldroot = blk1->bp->b_addr; - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { - struct xfs_da3_icnode_hdr nodehdr; - - dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); - btree = dp->d_ops->node_tree_p(oldroot); - size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); - level = nodehdr.level; - - /* - * we are about to copy oldroot to bp, so set up the type - * of bp while we know exactly what it will be. - */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); - } else { - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; - - leaf = (xfs_dir2_leaf_t *)oldroot; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - - ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || - leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); - size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); - level = 0; - - /* - * we are about to copy oldroot to bp, so set up the type - * of bp while we know exactly what it will be. - */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); - } - - /* - * we can copy most of the information in the node from one block to - * another, but for CRC enabled headers we have to make sure that the - * block specific identifiers are kept intact. We update the buffer - * directly for this. - */ - memcpy(node, oldroot, size); - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || - oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; - - node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); - } - xfs_trans_log_buf(tp, bp, 0, size - 1); - - bp->b_ops = blk1->bp->b_ops; - xfs_trans_buf_copy_type(bp, blk1->bp); - blk1->bp = bp; - blk1->blkno = blkno; - - /* - * Set up the new root node. - */ - error = xfs_da3_node_create(args, - (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, - level + 1, &bp, args->whichfork); - if (error) - return error; - - node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - btree[0].hashval = cpu_to_be32(blk1->hashval); - btree[0].before = cpu_to_be32(blk1->blkno); - btree[1].hashval = cpu_to_be32(blk2->hashval); - btree[1].before = cpu_to_be32(blk2->blkno); - nodehdr.count = 2; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); - -#ifdef DEBUG - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - ASSERT(blk1->blkno >= args->geo->leafblk && - blk1->blkno < args->geo->freeblk); - ASSERT(blk2->blkno >= args->geo->leafblk && - blk2->blkno < args->geo->freeblk); - } -#endif - - /* Header is already logged by xfs_da_node_create */ - xfs_trans_log_buf(tp, bp, - XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); - - return 0; -} - -/* - * Split the node, rebalance, then add the new entry. - */ -STATIC int /* error */ -xfs_da3_node_split( - struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk, - struct xfs_da_state_blk *addblk, - int treelevel, - int *result) -{ - struct xfs_da_intnode *node; - struct xfs_da3_icnode_hdr nodehdr; - xfs_dablk_t blkno; - int newcount; - int error; - int useextra; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_split(state->args); - - node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - - /* - * With V2 dirs the extra block is data or freespace. - */ - useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; - newcount = 1 + useextra; - /* - * Do we have to split the node? - */ - if (nodehdr.count + newcount > state->args->geo->node_ents) { - /* - * Allocate a new node, add to the doubly linked chain of - * nodes, then move some of our excess entries into it. - */ - error = xfs_da_grow_inode(state->args, &blkno); - if (error) - return error; /* GROT: dir is inconsistent */ - - error = xfs_da3_node_create(state->args, blkno, treelevel, - &newblk->bp, state->args->whichfork); - if (error) - return error; /* GROT: dir is inconsistent */ - newblk->blkno = blkno; - newblk->magic = XFS_DA_NODE_MAGIC; - xfs_da3_node_rebalance(state, oldblk, newblk); - error = xfs_da3_blk_link(state, oldblk, newblk); - if (error) - return error; - *result = 1; - } else { - *result = 0; - } - - /* - * Insert the new entry(s) into the correct block - * (updating last hashval in the process). - * - * xfs_da3_node_add() inserts BEFORE the given index, - * and as a result of using node_lookup_int() we always - * point to a valid entry (not after one), but a split - * operation always results in a new block whose hashvals - * FOLLOW the current block. - * - * If we had double-split op below us, then add the extra block too. - */ - node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - if (oldblk->index <= nodehdr.count) { - oldblk->index++; - xfs_da3_node_add(state, oldblk, addblk); - if (useextra) { - if (state->extraafter) - oldblk->index++; - xfs_da3_node_add(state, oldblk, &state->extrablk); - state->extravalid = 0; - } - } else { - newblk->index++; - xfs_da3_node_add(state, newblk, addblk); - if (useextra) { - if (state->extraafter) - newblk->index++; - xfs_da3_node_add(state, newblk, &state->extrablk); - state->extravalid = 0; - } - } - - return 0; -} - -/* - * Balance the btree elements between two intermediate nodes, - * usually one full and one empty. - * - * NOTE: if blk2 is empty, then it will get the upper half of blk1. - */ -STATIC void -xfs_da3_node_rebalance( - struct xfs_da_state *state, - struct xfs_da_state_blk *blk1, - struct xfs_da_state_blk *blk2) -{ - struct xfs_da_intnode *node1; - struct xfs_da_intnode *node2; - struct xfs_da_intnode *tmpnode; - struct xfs_da_node_entry *btree1; - struct xfs_da_node_entry *btree2; - struct xfs_da_node_entry *btree_s; - struct xfs_da_node_entry *btree_d; - struct xfs_da3_icnode_hdr nodehdr1; - struct xfs_da3_icnode_hdr nodehdr2; - struct xfs_trans *tp; - int count; - int tmp; - int swap = 0; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_rebalance(state->args); - - node1 = blk1->bp->b_addr; - node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); - - /* - * Figure out how many entries need to move, and in which direction. - * Swap the nodes around if that makes it simpler. - */ - if (nodehdr1.count > 0 && nodehdr2.count > 0 && - ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || - (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < - be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { - tmpnode = node1; - node1 = node2; - node2 = tmpnode; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); - swap = 1; - } - - count = (nodehdr1.count - nodehdr2.count) / 2; - if (count == 0) - return; - tp = state->args->trans; - /* - * Two cases: high-to-low and low-to-high. - */ - if (count > 0) { - /* - * Move elements in node2 up to make a hole. - */ - tmp = nodehdr2.count; - if (tmp > 0) { - tmp *= (uint)sizeof(xfs_da_node_entry_t); - btree_s = &btree2[0]; - btree_d = &btree2[count]; - memmove(btree_d, btree_s, tmp); - } - - /* - * Move the req'd B-tree elements from high in node1 to - * low in node2. - */ - nodehdr2.count += count; - tmp = count * (uint)sizeof(xfs_da_node_entry_t); - btree_s = &btree1[nodehdr1.count - count]; - btree_d = &btree2[0]; - memcpy(btree_d, btree_s, tmp); - nodehdr1.count -= count; - } else { - /* - * Move the req'd B-tree elements from low in node2 to - * high in node1. - */ - count = -count; - tmp = count * (uint)sizeof(xfs_da_node_entry_t); - btree_s = &btree2[0]; - btree_d = &btree1[nodehdr1.count]; - memcpy(btree_d, btree_s, tmp); - nodehdr1.count += count; - - xfs_trans_log_buf(tp, blk1->bp, - XFS_DA_LOGRANGE(node1, btree_d, tmp)); - - /* - * Move elements in node2 down to fill the hole. - */ - tmp = nodehdr2.count - count; - tmp *= (uint)sizeof(xfs_da_node_entry_t); - btree_s = &btree2[count]; - btree_d = &btree2[0]; - memmove(btree_d, btree_s, tmp); - nodehdr2.count -= count; - } - - /* - * Log header of node 1 and all current bits of node 2. - */ - dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); - xfs_trans_log_buf(tp, blk1->bp, - XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); - - dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); - xfs_trans_log_buf(tp, blk2->bp, - XFS_DA_LOGRANGE(node2, &node2->hdr, - dp->d_ops->node_hdr_size + - (sizeof(btree2[0]) * nodehdr2.count))); - - /* - * Record the last hashval from each block for upward propagation. - * (note: don't use the swapped node pointers) - */ - if (swap) { - node1 = blk1->bp->b_addr; - node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); - } - blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); - blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); - - /* - * Adjust the expected index for insertion. - */ - if (blk1->index >= nodehdr1.count) { - blk2->index = blk1->index - nodehdr1.count; - blk1->index = nodehdr1.count + 1; /* make it invalid */ - } -} - -/* - * Add a new entry to an intermediate node. - */ -STATIC void -xfs_da3_node_add( - struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk) -{ - struct xfs_da_intnode *node; - struct xfs_da3_icnode_hdr nodehdr; - struct xfs_da_node_entry *btree; - int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_add(state->args); - - node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - - ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); - ASSERT(newblk->blkno != 0); - if (state->args->whichfork == XFS_DATA_FORK) - ASSERT(newblk->blkno >= state->args->geo->leafblk && - newblk->blkno < state->args->geo->freeblk); - - /* - * We may need to make some room before we insert the new node. - */ - tmp = 0; - if (oldblk->index < nodehdr.count) { - tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); - memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); - } - btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); - btree[oldblk->index].before = cpu_to_be32(newblk->blkno); - xfs_trans_log_buf(state->args->trans, oldblk->bp, - XFS_DA_LOGRANGE(node, &btree[oldblk->index], - tmp + sizeof(*btree))); - - nodehdr.count += 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); - xfs_trans_log_buf(state->args->trans, oldblk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); - - /* - * Copy the last hash value from the oldblk to propagate upwards. - */ - oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); -} - -/*======================================================================== - * Routines used for shrinking the Btree. - *========================================================================*/ - -/* - * Deallocate an empty leaf node, remove it from its parent, - * possibly deallocating that block, etc... - */ -int -xfs_da3_join( - struct xfs_da_state *state) -{ - struct xfs_da_state_blk *drop_blk; - struct xfs_da_state_blk *save_blk; - int action = 0; - int error; - - trace_xfs_da_join(state->args); - - drop_blk = &state->path.blk[ state->path.active-1 ]; - save_blk = &state->altpath.blk[ state->path.active-1 ]; - ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); - ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || - drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); - - /* - * Walk back up the tree joining/deallocating as necessary. - * When we stop dropping blocks, break out. - */ - for ( ; state->path.active >= 2; drop_blk--, save_blk--, - state->path.active--) { - /* - * See if we can combine the block with a neighbor. - * (action == 0) => no options, just leave - * (action == 1) => coalesce, then unlink - * (action == 2) => block empty, unlink it - */ - switch (drop_blk->magic) { - case XFS_ATTR_LEAF_MAGIC: - error = xfs_attr3_leaf_toosmall(state, &action); - if (error) - return error; - if (action == 0) - return 0; - xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); - break; - case XFS_DIR2_LEAFN_MAGIC: - error = xfs_dir2_leafn_toosmall(state, &action); - if (error) - return error; - if (action == 0) - return 0; - xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); - break; - case XFS_DA_NODE_MAGIC: - /* - * Remove the offending node, fixup hashvals, - * check for a toosmall neighbor. - */ - xfs_da3_node_remove(state, drop_blk); - xfs_da3_fixhashpath(state, &state->path); - error = xfs_da3_node_toosmall(state, &action); - if (error) - return error; - if (action == 0) - return 0; - xfs_da3_node_unbalance(state, drop_blk, save_blk); - break; - } - xfs_da3_fixhashpath(state, &state->altpath); - error = xfs_da3_blk_unlink(state, drop_blk, save_blk); - xfs_da_state_kill_altpath(state); - if (error) - return error; - error = xfs_da_shrink_inode(state->args, drop_blk->blkno, - drop_blk->bp); - drop_blk->bp = NULL; - if (error) - return error; - } - /* - * We joined all the way to the top. If it turns out that - * we only have one entry in the root, make the child block - * the new root. - */ - xfs_da3_node_remove(state, drop_blk); - xfs_da3_fixhashpath(state, &state->path); - error = xfs_da3_root_join(state, &state->path.blk[0]); - return error; -} - -#ifdef DEBUG -static void -xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) -{ - __be16 magic = blkinfo->magic; - - if (level == 1) { - ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || - magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || - magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); - } else { - ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - } - ASSERT(!blkinfo->forw); - ASSERT(!blkinfo->back); -} -#else /* !DEBUG */ -#define xfs_da_blkinfo_onlychild_validate(blkinfo, level) -#endif /* !DEBUG */ - -/* - * We have only one entry in the root. Copy the only remaining child of - * the old root to block 0 as the new root node. - */ -STATIC int -xfs_da3_root_join( - struct xfs_da_state *state, - struct xfs_da_state_blk *root_blk) -{ - struct xfs_da_intnode *oldroot; - struct xfs_da_args *args; - xfs_dablk_t child; - struct xfs_buf *bp; - struct xfs_da3_icnode_hdr oldroothdr; - struct xfs_da_node_entry *btree; - int error; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_root_join(state->args); - - ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); - - args = state->args; - oldroot = root_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); - ASSERT(oldroothdr.forw == 0); - ASSERT(oldroothdr.back == 0); - - /* - * If the root has more than one child, then don't do anything. - */ - if (oldroothdr.count > 1) - return 0; - - /* - * Read in the (only) child block, then copy those bytes into - * the root block's buffer and free the original child block. - */ - btree = dp->d_ops->node_tree_p(oldroot); - child = be32_to_cpu(btree[0].before); - ASSERT(child != 0); - error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, - args->whichfork); - if (error) - return error; - xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); - - /* - * This could be copying a leaf back into the root block in the case of - * there only being a single leaf block left in the tree. Hence we have - * to update the b_ops pointer as well to match the buffer type change - * that could occur. For dir3 blocks we also need to update the block - * number in the buffer header. - */ - memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); - root_blk->bp->b_ops = bp->b_ops; - xfs_trans_buf_copy_type(root_blk->bp, bp); - if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { - struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; - da3->blkno = cpu_to_be64(root_blk->bp->b_bn); - } - xfs_trans_log_buf(args->trans, root_blk->bp, 0, - args->geo->blksize - 1); - error = xfs_da_shrink_inode(args, child, bp); - return error; -} - -/* - * Check a node block and its neighbors to see if the block should be - * collapsed into one or the other neighbor. Always keep the block - * with the smaller block number. - * If the current block is over 50% full, don't try to join it, return 0. - * If the block is empty, fill in the state structure and return 2. - * If it can be collapsed, fill in the state structure and return 1. - * If nothing can be done, return 0. - */ -STATIC int -xfs_da3_node_toosmall( - struct xfs_da_state *state, - int *action) -{ - struct xfs_da_intnode *node; - struct xfs_da_state_blk *blk; - struct xfs_da_blkinfo *info; - xfs_dablk_t blkno; - struct xfs_buf *bp; - struct xfs_da3_icnode_hdr nodehdr; - int count; - int forward; - int error; - int retval; - int i; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_toosmall(state->args); - - /* - * Check for the degenerate case of the block being over 50% full. - * If so, it's not worth even looking to see if we might be able - * to coalesce with a sibling. - */ - blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->b_addr; - node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - if (nodehdr.count > (state->args->geo->node_ents >> 1)) { - *action = 0; /* blk over 50%, don't try to join */ - return 0; /* blk over 50%, don't try to join */ - } - - /* - * Check for the degenerate case of the block being empty. - * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (arbitrarily) - * to merge with the forward block unless it is NULL. - */ - if (nodehdr.count == 0) { - /* - * Make altpath point to the block we want to keep and - * path point to the block we want to drop (this one). - */ - forward = (info->forw != 0); - memcpy(&state->altpath, &state->path, sizeof(state->path)); - error = xfs_da3_path_shift(state, &state->altpath, forward, - 0, &retval); - if (error) - return error; - if (retval) { - *action = 0; - } else { - *action = 2; - } - return 0; - } - - /* - * Examine each sibling block to see if we can coalesce with - * at least 25% free space to spare. We need to figure out - * whether to merge with the forward or the backward block. - * We prefer coalescing with the lower numbered sibling so as - * to shrink a directory over time. - */ - count = state->args->geo->node_ents; - count -= state->args->geo->node_ents >> 2; - count -= nodehdr.count; - - /* start with smaller blk num */ - forward = nodehdr.forw < nodehdr.back; - for (i = 0; i < 2; forward = !forward, i++) { - struct xfs_da3_icnode_hdr thdr; - if (forward) - blkno = nodehdr.forw; - else - blkno = nodehdr.back; - if (blkno == 0) - continue; - error = xfs_da3_node_read(state->args->trans, dp, - blkno, -1, &bp, state->args->whichfork); - if (error) - return error; - - node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&thdr, node); - xfs_trans_brelse(state->args->trans, bp); - - if (count - thdr.count >= 0) - break; /* fits with at least 25% to spare */ - } - if (i >= 2) { - *action = 0; - return 0; - } - - /* - * Make altpath point to the block we want to keep (the lower - * numbered block) and path point to the block we want to drop. - */ - memcpy(&state->altpath, &state->path, sizeof(state->path)); - if (blkno < blk->blkno) { - error = xfs_da3_path_shift(state, &state->altpath, forward, - 0, &retval); - } else { - error = xfs_da3_path_shift(state, &state->path, forward, - 0, &retval); - } - if (error) - return error; - if (retval) { - *action = 0; - return 0; - } - *action = 1; - return 0; -} - -/* - * Pick up the last hashvalue from an intermediate node. - */ -STATIC uint -xfs_da3_node_lasthash( - struct xfs_inode *dp, - struct xfs_buf *bp, - int *count) -{ - struct xfs_da_intnode *node; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr nodehdr; - - node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - if (count) - *count = nodehdr.count; - if (!nodehdr.count) - return 0; - btree = dp->d_ops->node_tree_p(node); - return be32_to_cpu(btree[nodehdr.count - 1].hashval); -} - -/* - * Walk back up the tree adjusting hash values as necessary, - * when we stop making changes, return. - */ -void -xfs_da3_fixhashpath( - struct xfs_da_state *state, - struct xfs_da_state_path *path) -{ - struct xfs_da_state_blk *blk; - struct xfs_da_intnode *node; - struct xfs_da_node_entry *btree; - xfs_dahash_t lasthash=0; - int level; - int count; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_fixhashpath(state->args); - - level = path->active-1; - blk = &path->blk[ level ]; - switch (blk->magic) { - case XFS_ATTR_LEAF_MAGIC: - lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); - if (count == 0) - return; - break; - case XFS_DIR2_LEAFN_MAGIC: - lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); - if (count == 0) - return; - break; - case XFS_DA_NODE_MAGIC: - lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); - if (count == 0) - return; - break; - } - for (blk--, level--; level >= 0; blk--, level--) { - struct xfs_da3_icnode_hdr nodehdr; - - node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - if (be32_to_cpu(btree[blk->index].hashval) == lasthash) - break; - blk->hashval = lasthash; - btree[blk->index].hashval = cpu_to_be32(lasthash); - xfs_trans_log_buf(state->args->trans, blk->bp, - XFS_DA_LOGRANGE(node, &btree[blk->index], - sizeof(*btree))); - - lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); - } -} - -/* - * Remove an entry from an intermediate node. - */ -STATIC void -xfs_da3_node_remove( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk) -{ - struct xfs_da_intnode *node; - struct xfs_da3_icnode_hdr nodehdr; - struct xfs_da_node_entry *btree; - int index; - int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_remove(state->args); - - node = drop_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - ASSERT(drop_blk->index < nodehdr.count); - ASSERT(drop_blk->index >= 0); - - /* - * Copy over the offending entry, or just zero it out. - */ - index = drop_blk->index; - btree = dp->d_ops->node_tree_p(node); - if (index < nodehdr.count - 1) { - tmp = nodehdr.count - index - 1; - tmp *= (uint)sizeof(xfs_da_node_entry_t); - memmove(&btree[index], &btree[index + 1], tmp); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &btree[index], tmp)); - index = nodehdr.count - 1; - } - memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); - nodehdr.count -= 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); - - /* - * Copy the last hash value from the block to propagate upwards. - */ - drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); -} - -/* - * Unbalance the elements between two intermediate nodes, - * move all Btree elements from one node into another. - */ -STATIC void -xfs_da3_node_unbalance( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk) -{ - struct xfs_da_intnode *drop_node; - struct xfs_da_intnode *save_node; - struct xfs_da_node_entry *drop_btree; - struct xfs_da_node_entry *save_btree; - struct xfs_da3_icnode_hdr drop_hdr; - struct xfs_da3_icnode_hdr save_hdr; - struct xfs_trans *tp; - int sindex; - int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_unbalance(state->args); - - drop_node = drop_blk->bp->b_addr; - save_node = save_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); - dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); - drop_btree = dp->d_ops->node_tree_p(drop_node); - save_btree = dp->d_ops->node_tree_p(save_node); - tp = state->args->trans; - - /* - * If the dying block has lower hashvals, then move all the - * elements in the remaining block up to make a hole. - */ - if ((be32_to_cpu(drop_btree[0].hashval) < - be32_to_cpu(save_btree[0].hashval)) || - (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < - be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { - /* XXX: check this - is memmove dst correct? */ - tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); - memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); - - sindex = 0; - xfs_trans_log_buf(tp, save_blk->bp, - XFS_DA_LOGRANGE(save_node, &save_btree[0], - (save_hdr.count + drop_hdr.count) * - sizeof(xfs_da_node_entry_t))); - } else { - sindex = save_hdr.count; - xfs_trans_log_buf(tp, save_blk->bp, - XFS_DA_LOGRANGE(save_node, &save_btree[sindex], - drop_hdr.count * sizeof(xfs_da_node_entry_t))); - } - - /* - * Move all the B-tree elements from drop_blk to save_blk. - */ - tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); - memcpy(&save_btree[sindex], &drop_btree[0], tmp); - save_hdr.count += drop_hdr.count; - - dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); - xfs_trans_log_buf(tp, save_blk->bp, - XFS_DA_LOGRANGE(save_node, &save_node->hdr, - dp->d_ops->node_hdr_size)); - - /* - * Save the last hashval in the remaining block for upward propagation. - */ - save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); -} - -/*======================================================================== - * Routines used for finding things in the Btree. - *========================================================================*/ - -/* - * Walk down the Btree looking for a particular filename, filling - * in the state structure as we go. - * - * We will set the state structure to point to each of the elements - * in each of the nodes where either the hashval is or should be. - * - * We support duplicate hashval's so for each entry in the current - * node that could contain the desired hashval, descend. This is a - * pruned depth-first tree search. - */ -int /* error */ -xfs_da3_node_lookup_int( - struct xfs_da_state *state, - int *result) -{ - struct xfs_da_state_blk *blk; - struct xfs_da_blkinfo *curr; - struct xfs_da_intnode *node; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr nodehdr; - struct xfs_da_args *args; - xfs_dablk_t blkno; - xfs_dahash_t hashval; - xfs_dahash_t btreehashval; - int probe; - int span; - int max; - int error; - int retval; - struct xfs_inode *dp = state->args->dp; - - args = state->args; - - /* - * Descend thru the B-tree searching each level for the right - * node to use, until the right hashval is found. - */ - blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; - for (blk = &state->path.blk[0], state->path.active = 1; - state->path.active <= XFS_DA_NODE_MAXDEPTH; - blk++, state->path.active++) { - /* - * Read the next node down in the tree. - */ - blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, args->dp, blkno, - -1, &blk->bp, args->whichfork); - if (error) { - blk->blkno = 0; - state->path.active--; - return error; - } - curr = blk->bp->b_addr; - blk->magic = be16_to_cpu(curr->magic); - - if (blk->magic == XFS_ATTR_LEAF_MAGIC || - blk->magic == XFS_ATTR3_LEAF_MAGIC) { - blk->magic = XFS_ATTR_LEAF_MAGIC; - blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); - break; - } - - if (blk->magic == XFS_DIR2_LEAFN_MAGIC || - blk->magic == XFS_DIR3_LEAFN_MAGIC) { - blk->magic = XFS_DIR2_LEAFN_MAGIC; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); - break; - } - - blk->magic = XFS_DA_NODE_MAGIC; - - - /* - * Search an intermediate node for a match. - */ - node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - - max = nodehdr.count; - blk->hashval = be32_to_cpu(btree[max - 1].hashval); - - /* - * Binary search. (note: small blocks will skip loop) - */ - probe = span = max / 2; - hashval = args->hashval; - while (span > 4) { - span /= 2; - btreehashval = be32_to_cpu(btree[probe].hashval); - if (btreehashval < hashval) - probe += span; - else if (btreehashval > hashval) - probe -= span; - else - break; - } - ASSERT((probe >= 0) && (probe < max)); - ASSERT((span <= 4) || - (be32_to_cpu(btree[probe].hashval) == hashval)); - - /* - * Since we may have duplicate hashval's, find the first - * matching hashval in the node. - */ - while (probe > 0 && - be32_to_cpu(btree[probe].hashval) >= hashval) { - probe--; - } - while (probe < max && - be32_to_cpu(btree[probe].hashval) < hashval) { - probe++; - } - - /* - * Pick the right block to descend on. - */ - if (probe == max) { - blk->index = max - 1; - blkno = be32_to_cpu(btree[max - 1].before); - } else { - blk->index = probe; - blkno = be32_to_cpu(btree[probe].before); - } - } - - /* - * A leaf block that ends in the hashval that we are interested in - * (final hashval == search hashval) means that the next block may - * contain more entries with the same hashval, shift upward to the - * next leaf and keep searching. - */ - for (;;) { - if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { - retval = xfs_dir2_leafn_lookup_int(blk->bp, args, - &blk->index, state); - } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { - retval = xfs_attr3_leaf_lookup_int(blk->bp, args); - blk->index = args->index; - args->blkno = blk->blkno; - } else { - ASSERT(0); - return EFSCORRUPTED; - } - if (((retval == ENOENT) || (retval == ENOATTR)) && - (blk->hashval == args->hashval)) { - error = xfs_da3_path_shift(state, &state->path, 1, 1, - &retval); - if (error) - return error; - if (retval == 0) { - continue; - } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { - /* path_shift() gives ENOENT */ - retval = ENOATTR; - } - } - break; - } - *result = retval; - return 0; -} - -/*======================================================================== - * Utility routines. - *========================================================================*/ - -/* - * Compare two intermediate nodes for "order". - */ -STATIC int -xfs_da3_node_order( - struct xfs_inode *dp, - struct xfs_buf *node1_bp, - struct xfs_buf *node2_bp) -{ - struct xfs_da_intnode *node1; - struct xfs_da_intnode *node2; - struct xfs_da_node_entry *btree1; - struct xfs_da_node_entry *btree2; - struct xfs_da3_icnode_hdr node1hdr; - struct xfs_da3_icnode_hdr node2hdr; - - node1 = node1_bp->b_addr; - node2 = node2_bp->b_addr; - dp->d_ops->node_hdr_from_disk(&node1hdr, node1); - dp->d_ops->node_hdr_from_disk(&node2hdr, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); - - if (node1hdr.count > 0 && node2hdr.count > 0 && - ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || - (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < - be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { - return 1; - } - return 0; -} - -/* - * Link a new block into a doubly linked list of blocks (of whatever type). - */ -int /* error */ -xfs_da3_blk_link( - struct xfs_da_state *state, - struct xfs_da_state_blk *old_blk, - struct xfs_da_state_blk *new_blk) -{ - struct xfs_da_blkinfo *old_info; - struct xfs_da_blkinfo *new_info; - struct xfs_da_blkinfo *tmp_info; - struct xfs_da_args *args; - struct xfs_buf *bp; - int before = 0; - int error; - struct xfs_inode *dp = state->args->dp; - - /* - * Set up environment. - */ - args = state->args; - ASSERT(args != NULL); - old_info = old_blk->bp->b_addr; - new_info = new_blk->bp->b_addr; - ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || - old_blk->magic == XFS_DIR2_LEAFN_MAGIC || - old_blk->magic == XFS_ATTR_LEAF_MAGIC); - - switch (old_blk->magic) { - case XFS_ATTR_LEAF_MAGIC: - before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); - break; - case XFS_DIR2_LEAFN_MAGIC: - before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); - break; - case XFS_DA_NODE_MAGIC: - before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); - break; - } - - /* - * Link blocks in appropriate order. - */ - if (before) { - /* - * Link new block in before existing block. - */ - trace_xfs_da_link_before(args); - new_info->forw = cpu_to_be32(old_blk->blkno); - new_info->back = old_info->back; - if (old_info->back) { - error = xfs_da3_node_read(args->trans, dp, - be32_to_cpu(old_info->back), - -1, &bp, args->whichfork); - if (error) - return error; - ASSERT(bp != NULL); - tmp_info = bp->b_addr; - ASSERT(tmp_info->magic == old_info->magic); - ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); - tmp_info->forw = cpu_to_be32(new_blk->blkno); - xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - } - old_info->back = cpu_to_be32(new_blk->blkno); - } else { - /* - * Link new block in after existing block. - */ - trace_xfs_da_link_after(args); - new_info->forw = old_info->forw; - new_info->back = cpu_to_be32(old_blk->blkno); - if (old_info->forw) { - error = xfs_da3_node_read(args->trans, dp, - be32_to_cpu(old_info->forw), - -1, &bp, args->whichfork); - if (error) - return error; - ASSERT(bp != NULL); - tmp_info = bp->b_addr; - ASSERT(tmp_info->magic == old_info->magic); - ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); - tmp_info->back = cpu_to_be32(new_blk->blkno); - xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - } - old_info->forw = cpu_to_be32(new_blk->blkno); - } - - xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); - xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); - return 0; -} - -/* - * Unlink a block from a doubly linked list of blocks. - */ -STATIC int /* error */ -xfs_da3_blk_unlink( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk) -{ - struct xfs_da_blkinfo *drop_info; - struct xfs_da_blkinfo *save_info; - struct xfs_da_blkinfo *tmp_info; - struct xfs_da_args *args; - struct xfs_buf *bp; - int error; - - /* - * Set up environment. - */ - args = state->args; - ASSERT(args != NULL); - save_info = save_blk->bp->b_addr; - drop_info = drop_blk->bp->b_addr; - ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || - save_blk->magic == XFS_DIR2_LEAFN_MAGIC || - save_blk->magic == XFS_ATTR_LEAF_MAGIC); - ASSERT(save_blk->magic == drop_blk->magic); - ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || - (be32_to_cpu(save_info->back) == drop_blk->blkno)); - ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || - (be32_to_cpu(drop_info->back) == save_blk->blkno)); - - /* - * Unlink the leaf block from the doubly linked chain of leaves. - */ - if (be32_to_cpu(save_info->back) == drop_blk->blkno) { - trace_xfs_da_unlink_back(args); - save_info->back = drop_info->back; - if (drop_info->back) { - error = xfs_da3_node_read(args->trans, args->dp, - be32_to_cpu(drop_info->back), - -1, &bp, args->whichfork); - if (error) - return error; - ASSERT(bp != NULL); - tmp_info = bp->b_addr; - ASSERT(tmp_info->magic == save_info->magic); - ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); - tmp_info->forw = cpu_to_be32(save_blk->blkno); - xfs_trans_log_buf(args->trans, bp, 0, - sizeof(*tmp_info) - 1); - } - } else { - trace_xfs_da_unlink_forward(args); - save_info->forw = drop_info->forw; - if (drop_info->forw) { - error = xfs_da3_node_read(args->trans, args->dp, - be32_to_cpu(drop_info->forw), - -1, &bp, args->whichfork); - if (error) - return error; - ASSERT(bp != NULL); - tmp_info = bp->b_addr; - ASSERT(tmp_info->magic == save_info->magic); - ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); - tmp_info->back = cpu_to_be32(save_blk->blkno); - xfs_trans_log_buf(args->trans, bp, 0, - sizeof(*tmp_info) - 1); - } - } - - xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); - return 0; -} - -/* - * Move a path "forward" or "!forward" one block at the current level. - * - * This routine will adjust a "path" to point to the next block - * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the - * Btree, including updating pointers to the intermediate nodes between - * the new bottom and the root. - */ -int /* error */ -xfs_da3_path_shift( - struct xfs_da_state *state, - struct xfs_da_state_path *path, - int forward, - int release, - int *result) -{ - struct xfs_da_state_blk *blk; - struct xfs_da_blkinfo *info; - struct xfs_da_intnode *node; - struct xfs_da_args *args; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr nodehdr; - xfs_dablk_t blkno = 0; - int level; - int error; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_path_shift(state->args); - - /* - * Roll up the Btree looking for the first block where our - * current index is not at the edge of the block. Note that - * we skip the bottom layer because we want the sibling block. - */ - args = state->args; - ASSERT(args != NULL); - ASSERT(path != NULL); - ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); - level = (path->active-1) - 1; /* skip bottom layer in path */ - for (blk = &path->blk[level]; level >= 0; blk--, level--) { - node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - - if (forward && (blk->index < nodehdr.count - 1)) { - blk->index++; - blkno = be32_to_cpu(btree[blk->index].before); - break; - } else if (!forward && (blk->index > 0)) { - blk->index--; - blkno = be32_to_cpu(btree[blk->index].before); - break; - } - } - if (level < 0) { - *result = ENOENT; /* we're out of our tree */ - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - return 0; - } - - /* - * Roll down the edge of the subtree until we reach the - * same depth we were at originally. - */ - for (blk++, level++; level < path->active; blk++, level++) { - /* - * Release the old block. - * (if it's dirty, trans won't actually let go) - */ - if (release) - xfs_trans_brelse(args->trans, blk->bp); - - /* - * Read the next child block. - */ - blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, dp, blkno, -1, - &blk->bp, args->whichfork); - if (error) - return error; - info = blk->bp->b_addr; - ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || - info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || - info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || - info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); - - - /* - * Note: we flatten the magic number to a single type so we - * don't have to compare against crc/non-crc types elsewhere. - */ - switch (be16_to_cpu(info->magic)) { - case XFS_DA_NODE_MAGIC: - case XFS_DA3_NODE_MAGIC: - blk->magic = XFS_DA_NODE_MAGIC; - node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); - blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); - if (forward) - blk->index = 0; - else - blk->index = nodehdr.count - 1; - blkno = be32_to_cpu(btree[blk->index].before); - break; - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: - blk->magic = XFS_ATTR_LEAF_MAGIC; - ASSERT(level == path->active-1); - blk->index = 0; - blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); - break; - case XFS_DIR2_LEAFN_MAGIC: - case XFS_DIR3_LEAFN_MAGIC: - blk->magic = XFS_DIR2_LEAFN_MAGIC; - ASSERT(level == path->active-1); - blk->index = 0; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); - break; - default: - ASSERT(0); - break; - } - } - *result = 0; - return 0; -} - - -/*======================================================================== - * Utility routines. - *========================================================================*/ - -/* - * Implement a simple hash on a character string. - * Rotate the hash value by 7 bits, then XOR each character in. - * This is implemented with some source-level loop unrolling. - */ -xfs_dahash_t -xfs_da_hashname(const __uint8_t *name, int namelen) -{ - xfs_dahash_t hash; - - /* - * Do four characters at a time as long as we can. - */ - for (hash = 0; namelen >= 4; namelen -= 4, name += 4) - hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ - (name[3] << 0) ^ rol32(hash, 7 * 4); - - /* - * Now do the rest of the characters. - */ - switch (namelen) { - case 3: - return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ - rol32(hash, 7 * 3); - case 2: - return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); - case 1: - return (name[0] << 0) ^ rol32(hash, 7 * 1); - default: /* case 0: */ - return hash; - } -} - -enum xfs_dacmp -xfs_da_compname( - struct xfs_da_args *args, - const unsigned char *name, - int len) -{ - return (args->namelen == len && memcmp(args->name, name, len) == 0) ? - XFS_CMP_EXACT : XFS_CMP_DIFFERENT; -} - -static xfs_dahash_t -xfs_default_hashname( - struct xfs_name *name) -{ - return xfs_da_hashname(name->name, name->len); -} - -const struct xfs_nameops xfs_default_nameops = { - .hashname = xfs_default_hashname, - .compname = xfs_da_compname -}; - -int -xfs_da_grow_inode_int( - struct xfs_da_args *args, - xfs_fileoff_t *bno, - int count) -{ - struct xfs_trans *tp = args->trans; - struct xfs_inode *dp = args->dp; - int w = args->whichfork; - xfs_drfsbno_t nblks = dp->i_d.di_nblocks; - struct xfs_bmbt_irec map, *mapp; - int nmap, error, got, i, mapi; - - /* - * Find a spot in the file space to put the new block. - */ - error = xfs_bmap_first_unused(tp, dp, count, bno, w); - if (error) - return error; - - /* - * Try mapping it in one filesystem block. - */ - nmap = 1; - ASSERT(args->firstblock != NULL); - error = xfs_bmapi_write(tp, dp, *bno, count, - xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, - args->firstblock, args->total, &map, &nmap, - args->flist); - if (error) - return error; - - ASSERT(nmap <= 1); - if (nmap == 1) { - mapp = ↦ - mapi = 1; - } else if (nmap == 0 && count > 1) { - xfs_fileoff_t b; - int c; - - /* - * If we didn't get it and the block might work if fragmented, - * try without the CONTIG flag. Loop until we get it all. - */ - mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - for (b = *bno, mapi = 0; b < *bno + count; ) { - nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(*bno + count - b); - error = xfs_bmapi_write(tp, dp, b, c, - xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist); - if (error) - goto out_free_map; - if (nmap < 1) - break; - mapi += nmap; - b = mapp[mapi - 1].br_startoff + - mapp[mapi - 1].br_blockcount; - } - } else { - mapi = 0; - mapp = NULL; - } - - /* - * Count the blocks we got, make sure it matches the total. - */ - for (i = 0, got = 0; i < mapi; i++) - got += mapp[i].br_blockcount; - if (got != count || mapp[0].br_startoff != *bno || - mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - *bno + count) { - error = ENOSPC; - goto out_free_map; - } - - /* account for newly allocated blocks in reserved blocks total */ - args->total -= dp->i_d.di_nblocks - nblks; - -out_free_map: - if (mapp != &map) - kmem_free(mapp); - return error; -} - -/* - * Add a block to the btree ahead of the file. - * Return the new block number to the caller. - */ -int -xfs_da_grow_inode( - struct xfs_da_args *args, - xfs_dablk_t *new_blkno) -{ - xfs_fileoff_t bno; - int error; - - trace_xfs_da_grow_inode(args); - - bno = args->geo->leafblk; - error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); - if (!error) - *new_blkno = (xfs_dablk_t)bno; - return error; -} - -/* - * Ick. We need to always be able to remove a btree block, even - * if there's no space reservation because the filesystem is full. - * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. - * It swaps the target block with the last block in the file. The - * last block in the file can always be removed since it can't cause - * a bmap btree split to do that. - */ -STATIC int -xfs_da3_swap_lastblock( - struct xfs_da_args *args, - xfs_dablk_t *dead_blknop, - struct xfs_buf **dead_bufp) -{ - struct xfs_da_blkinfo *dead_info; - struct xfs_da_blkinfo *sib_info; - struct xfs_da_intnode *par_node; - struct xfs_da_intnode *dead_node; - struct xfs_dir2_leaf *dead_leaf2; - struct xfs_da_node_entry *btree; - struct xfs_da3_icnode_hdr par_hdr; - struct xfs_inode *dp; - struct xfs_trans *tp; - struct xfs_mount *mp; - struct xfs_buf *dead_buf; - struct xfs_buf *last_buf; - struct xfs_buf *sib_buf; - struct xfs_buf *par_buf; - xfs_dahash_t dead_hash; - xfs_fileoff_t lastoff; - xfs_dablk_t dead_blkno; - xfs_dablk_t last_blkno; - xfs_dablk_t sib_blkno; - xfs_dablk_t par_blkno; - int error; - int w; - int entno; - int level; - int dead_level; - - trace_xfs_da_swap_lastblock(args); - - dead_buf = *dead_bufp; - dead_blkno = *dead_blknop; - tp = args->trans; - dp = args->dp; - w = args->whichfork; - ASSERT(w == XFS_DATA_FORK); - mp = dp->i_mount; - lastoff = args->geo->freeblk; - error = xfs_bmap_last_before(tp, dp, &lastoff, w); - if (error) - return error; - if (unlikely(lastoff == 0)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, - mp); - return EFSCORRUPTED; - } - /* - * Read the last block in the btree space. - */ - last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; - error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); - if (error) - return error; - /* - * Copy the last block into the dead buffer and log it. - */ - memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); - xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); - dead_info = dead_buf->b_addr; - /* - * Get values from the moved block. - */ - if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; - - dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); - ents = dp->d_ops->leaf_ents_p(dead_leaf2); - dead_level = 0; - dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); - } else { - struct xfs_da3_icnode_hdr deadhdr; - - dead_node = (xfs_da_intnode_t *)dead_info; - dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); - btree = dp->d_ops->node_tree_p(dead_node); - dead_level = deadhdr.level; - dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); - } - sib_buf = par_buf = NULL; - /* - * If the moved block has a left sibling, fix up the pointers. - */ - if ((sib_blkno = be32_to_cpu(dead_info->back))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); - if (error) - goto done; - sib_info = sib_buf->b_addr; - if (unlikely( - be32_to_cpu(sib_info->forw) != last_blkno || - sib_info->magic != dead_info->magic)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - sib_info->forw = cpu_to_be32(dead_blkno); - xfs_trans_log_buf(tp, sib_buf, - XFS_DA_LOGRANGE(sib_info, &sib_info->forw, - sizeof(sib_info->forw))); - sib_buf = NULL; - } - /* - * If the moved block has a right sibling, fix up the pointers. - */ - if ((sib_blkno = be32_to_cpu(dead_info->forw))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); - if (error) - goto done; - sib_info = sib_buf->b_addr; - if (unlikely( - be32_to_cpu(sib_info->back) != last_blkno || - sib_info->magic != dead_info->magic)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - sib_info->back = cpu_to_be32(dead_blkno); - xfs_trans_log_buf(tp, sib_buf, - XFS_DA_LOGRANGE(sib_info, &sib_info->back, - sizeof(sib_info->back))); - sib_buf = NULL; - } - par_blkno = args->geo->leafblk; - level = -1; - /* - * Walk down the tree looking for the parent of the moved block. - */ - for (;;) { - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); - if (error) - goto done; - par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); - if (level >= 0 && level != par_hdr.level + 1) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - level = par_hdr.level; - btree = dp->d_ops->node_tree_p(par_node); - for (entno = 0; - entno < par_hdr.count && - be32_to_cpu(btree[entno].hashval) < dead_hash; - entno++) - continue; - if (entno == par_hdr.count) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - par_blkno = be32_to_cpu(btree[entno].before); - if (level == dead_level + 1) - break; - xfs_trans_brelse(tp, par_buf); - par_buf = NULL; - } - /* - * We're in the right parent block. - * Look for the right entry. - */ - for (;;) { - for (; - entno < par_hdr.count && - be32_to_cpu(btree[entno].before) != last_blkno; - entno++) - continue; - if (entno < par_hdr.count) - break; - par_blkno = par_hdr.forw; - xfs_trans_brelse(tp, par_buf); - par_buf = NULL; - if (unlikely(par_blkno == 0)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); - if (error) - goto done; - par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); - if (par_hdr.level != level) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", - XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; - goto done; - } - btree = dp->d_ops->node_tree_p(par_node); - entno = 0; - } - /* - * Update the parent entry pointing to the moved block. - */ - btree[entno].before = cpu_to_be32(dead_blkno); - xfs_trans_log_buf(tp, par_buf, - XFS_DA_LOGRANGE(par_node, &btree[entno].before, - sizeof(btree[entno].before))); - *dead_blknop = last_blkno; - *dead_bufp = last_buf; - return 0; -done: - if (par_buf) - xfs_trans_brelse(tp, par_buf); - if (sib_buf) - xfs_trans_brelse(tp, sib_buf); - xfs_trans_brelse(tp, last_buf); - return error; -} - -/* - * Remove a btree block from a directory or attribute. - */ -int -xfs_da_shrink_inode( - xfs_da_args_t *args, - xfs_dablk_t dead_blkno, - struct xfs_buf *dead_buf) -{ - xfs_inode_t *dp; - int done, error, w, count; - xfs_trans_t *tp; - xfs_mount_t *mp; - - trace_xfs_da_shrink_inode(args); - - dp = args->dp; - w = args->whichfork; - tp = args->trans; - mp = dp->i_mount; - count = args->geo->fsbcount; - for (;;) { - /* - * Remove extents. If we get ENOSPC for a dir we have to move - * the last block to the place we want to kill. - */ - error = xfs_bunmapi(tp, dp, dead_blkno, count, - xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - 0, args->firstblock, args->flist, &done); - if (error == ENOSPC) { - if (w != XFS_DATA_FORK) - break; - error = xfs_da3_swap_lastblock(args, &dead_blkno, - &dead_buf); - if (error) - break; - } else { - break; - } - } - xfs_trans_binval(tp, dead_buf); - return error; -} - -/* - * See if the mapping(s) for this btree block are valid, i.e. - * don't contain holes, are logically contiguous, and cover the whole range. - */ -STATIC int -xfs_da_map_covers_blocks( - int nmap, - xfs_bmbt_irec_t *mapp, - xfs_dablk_t bno, - int count) -{ - int i; - xfs_fileoff_t off; - - for (i = 0, off = bno; i < nmap; i++) { - if (mapp[i].br_startblock == HOLESTARTBLOCK || - mapp[i].br_startblock == DELAYSTARTBLOCK) { - return 0; - } - if (off != mapp[i].br_startoff) { - return 0; - } - off += mapp[i].br_blockcount; - } - return off == bno + count; -} - -/* - * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. - * - * For the single map case, it is assumed that the caller has provided a pointer - * to a valid xfs_buf_map. For the multiple map case, this function will - * allocate the xfs_buf_map to hold all the maps and replace the caller's single - * map pointer with the allocated map. - */ -static int -xfs_buf_map_from_irec( - struct xfs_mount *mp, - struct xfs_buf_map **mapp, - int *nmaps, - struct xfs_bmbt_irec *irecs, - int nirecs) -{ - struct xfs_buf_map *map; - int i; - - ASSERT(*nmaps == 1); - ASSERT(nirecs >= 1); - - if (nirecs > 1) { - map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), - KM_SLEEP | KM_NOFS); - if (!map) - return ENOMEM; - *mapp = map; - } - - *nmaps = nirecs; - map = *mapp; - for (i = 0; i < *nmaps; i++) { - ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && - irecs[i].br_startblock != HOLESTARTBLOCK); - map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); - map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); - } - return 0; -} - -/* - * Map the block we are given ready for reading. There are three possible return - * values: - * -1 - will be returned if we land in a hole and mappedbno == -2 so the - * caller knows not to execute a subsequent read. - * 0 - if we mapped the block successfully - * >0 - positive error number if there was an error. - */ -static int -xfs_dabuf_map( - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - int whichfork, - struct xfs_buf_map **map, - int *nmaps) -{ - struct xfs_mount *mp = dp->i_mount; - int nfsb; - int error = 0; - struct xfs_bmbt_irec irec; - struct xfs_bmbt_irec *irecs = &irec; - int nirecs; - - ASSERT(map && *map); - ASSERT(*nmaps == 1); - - if (whichfork == XFS_DATA_FORK) - nfsb = mp->m_dir_geo->fsbcount; - else - nfsb = mp->m_attr_geo->fsbcount; - - /* - * Caller doesn't have a mapping. -2 means don't complain - * if we land in a hole. - */ - if (mappedbno == -1 || mappedbno == -2) { - /* - * Optimize the one-block case. - */ - if (nfsb != 1) - irecs = kmem_zalloc(sizeof(irec) * nfsb, - KM_SLEEP | KM_NOFS); - - nirecs = nfsb; - error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, - &nirecs, xfs_bmapi_aflag(whichfork)); - if (error) - goto out; - } else { - irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); - irecs->br_startoff = (xfs_fileoff_t)bno; - irecs->br_blockcount = nfsb; - irecs->br_state = 0; - nirecs = 1; - } - - if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { - error = mappedbno == -2 ? -1 : EFSCORRUPTED; - if (unlikely(error == EFSCORRUPTED)) { - if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - int i; - xfs_alert(mp, "%s: bno %lld dir: inode %lld", - __func__, (long long)bno, - (long long)dp->i_ino); - for (i = 0; i < *nmaps; i++) { - xfs_alert(mp, -"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", - i, - (long long)irecs[i].br_startoff, - (long long)irecs[i].br_startblock, - (long long)irecs[i].br_blockcount, - irecs[i].br_state); - } - } - XFS_ERROR_REPORT("xfs_da_do_buf(1)", - XFS_ERRLEVEL_LOW, mp); - } - goto out; - } - error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); -out: - if (irecs != &irec) - kmem_free(irecs); - return error; -} - -/* - * Get a buffer for the dir/attr block. - */ -int -xfs_da_get_buf( - struct xfs_trans *trans, - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp, - int whichfork) -{ - struct xfs_buf *bp; - struct xfs_buf_map map; - struct xfs_buf_map *mapp; - int nmap; - int error; - - *bpp = NULL; - mapp = ↦ - nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; - goto out_free; - } - - bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, - mapp, nmap, 0); - error = bp ? bp->b_error : EIO; - if (error) { - xfs_trans_brelse(trans, bp); - goto out_free; - } - - *bpp = bp; - -out_free: - if (mapp != &map) - kmem_free(mapp); - - return error; -} - -/* - * Get a buffer for the dir/attr block, fill in the contents. - */ -int -xfs_da_read_buf( - struct xfs_trans *trans, - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp, - int whichfork, - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; - struct xfs_buf_map map; - struct xfs_buf_map *mapp; - int nmap; - int error; - - *bpp = NULL; - mapp = ↦ - nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; - goto out_free; - } - - error = xfs_trans_read_buf_map(dp->i_mount, trans, - dp->i_mount->m_ddev_targp, - mapp, nmap, 0, &bp, ops); - if (error) - goto out_free; - - if (whichfork == XFS_ATTR_FORK) - xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); - else - xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - *bpp = bp; -out_free: - if (mapp != &map) - kmem_free(mapp); - - return error; -} - -/* - * Readahead the dir/attr block. - */ -xfs_daddr_t -xfs_da_reada_buf( - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - int whichfork, - const struct xfs_buf_ops *ops) -{ - struct xfs_buf_map map; - struct xfs_buf_map *mapp; - int nmap; - int error; - - mapp = ↦ - nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; - goto out_free; - } - - mappedbno = mapp[0].bm_bn; - xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); - -out_free: - if (mapp != &map) - kmem_free(mapp); - - if (error) - return -1; - return mappedbno; -} diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c deleted file mode 100644 index c9aee52a37e2..000000000000 --- a/fs/xfs/xfs_da_format.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" - -/* - * Shortform directory ops - */ -static int -xfs_dir2_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ - - count += len; /* name */ - count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : - sizeof(xfs_dir2_ino4_t); /* ino # */ - return count; -} - -static int -xfs_dir3_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); -} - -static struct xfs_dir2_sf_entry * -xfs_dir2_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); -} - -static struct xfs_dir2_sf_entry * -xfs_dir3_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); -} - - -/* - * For filetype enabled shortform directories, the file type field is stored at - * the end of the name. Because it's only a single byte, endian conversion is - * not necessary. For non-filetype enable directories, the type is always - * unknown and we never store the value. - */ -static __uint8_t -xfs_dir2_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static __uint8_t -xfs_dir3_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - __uint8_t ftype; - - ftype = sfep->name[sfep->namelen]; - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); - - sfep->name[sfep->namelen] = ftype; -} - -/* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, &to->i8.i); - else - put_unaligned_be32(ino, &to->i4.i); -} - -static xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -static void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. If the entry stores a filetype value, then it - * sits between the name and the inode number. Hence the inode numbers may only - * be accessed through the helpers below. - */ -static xfs_ino_t -xfs_dir2_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); -} - -static void -xfs_dir2_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); -} - -static xfs_ino_t -xfs_dir3_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); -} - -static void -xfs_dir3_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); -} - - -/* - * Directory data block operations - */ - -/* - * For special situations, the dirent size ends up fixed because we always know - * what the size of the entry is. That's true for the "." and "..", and - * therefore we know that they are a fixed size and hence their offsets are - * constant, as is the first entry. - * - * Hence, this calculation is written as a macro to be able to be calculated at - * compile time and so certain offsets can be calculated directly in the - * structure initaliser via the macro. There are two macros - one for dirents - * with ftype and without so there are no unresolvable conditionals in the - * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power - * of 2 and the compiler doesn't reject it (unlike roundup()). - */ -#define XFS_DIR2_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) - -#define XFS_DIR3_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ - XFS_DIR2_DATA_ALIGN) - -static int -xfs_dir2_data_entsize( - int n) -{ - return XFS_DIR2_DATA_ENTSIZE(n); -} - -static int -xfs_dir3_data_entsize( - int n) -{ - return XFS_DIR3_DATA_ENTSIZE(n); -} - -static __uint8_t -xfs_dir2_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_data_put_ftype( - struct xfs_dir2_data_entry *dep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static __uint8_t -xfs_dir3_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - __uint8_t ftype = dep->name[dep->namelen]; - - ASSERT(ftype < XFS_DIR3_FT_MAX); - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_data_put_ftype( - struct xfs_dir2_data_entry *dep, - __uint8_t type) -{ - ASSERT(type < XFS_DIR3_FT_MAX); - ASSERT(dep->namelen != 0); - - dep->name[dep->namelen] = type; -} - -/* - * Pointer to an entry's tag word. - */ -static __be16 * -xfs_dir2_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -static __be16 * -xfs_dir3_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * location of . and .. in data space (always block 0) - */ -static struct xfs_dir2_data_entry * -xfs_dir2_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_free * -xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return hdr->bestfree; -} - -static struct xfs_dir2_data_free * -xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir3_data_hdr *)hdr)->best_free; -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - - -/* - * Directory Leaf block operations - */ -static int -xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return lp->__ents; -} - -static int -xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return ((struct xfs_dir3_leaf *)lp)->__ents; -} - -static void -xfs_dir2_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.count); - to->stale = be16_to_cpu(from->hdr.stale); - - ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || - to->magic == XFS_DIR2_LEAFN_MAGIC); -} - -static void -xfs_dir2_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || - from->magic == XFS_DIR2_LEAFN_MAGIC); - - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.count = cpu_to_be16(from->count); - to->hdr.stale = cpu_to_be16(from->stale); -} - -static void -xfs_dir3_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->count); - to->stale = be16_to_cpu(hdr3->stale); - - ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || - to->magic == XFS_DIR3_LEAFN_MAGIC); -} - -static void -xfs_dir3_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || - from->magic == XFS_DIR3_LEAFN_MAGIC); - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->count = cpu_to_be16(from->count); - hdr3->stale = cpu_to_be16(from->stale); -} - - -/* - * Directory/Attribute Node block operations - */ -static struct xfs_da_node_entry * -xfs_da2_node_tree_p(struct xfs_da_intnode *dap) -{ - return dap->__btree; -} - -static struct xfs_da_node_entry * -xfs_da3_node_tree_p(struct xfs_da_intnode *dap) -{ - return ((struct xfs_da3_intnode *)dap)->__btree; -} - -static void -xfs_da2_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.__count); - to->level = be16_to_cpu(from->hdr.__level); -} - -static void -xfs_da2_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - ASSERT(from->magic == XFS_DA_NODE_MAGIC); - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.__count = cpu_to_be16(from->count); - to->hdr.__level = cpu_to_be16(from->level); -} - -static void -xfs_da3_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; - - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->__count); - to->level = be16_to_cpu(hdr3->__level); -} - -static void -xfs_da3_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; - - ASSERT(from->magic == XFS_DA3_NODE_MAGIC); - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->__count = cpu_to_be16(from->count); - hdr3->__level = cpu_to_be16(from->level); -} - - -/* - * Directory free space block operations - */ -static int -xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir2_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + - (db / xfs_dir2_free_max_bests(geo)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return db % xfs_dir2_free_max_bests(geo); -} - -static int -xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir3_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + - (db / xfs_dir3_free_max_bests(geo)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return db % xfs_dir3_free_max_bests(geo); -} - -static void -xfs_dir2_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - to->magic = be32_to_cpu(from->hdr.magic); - to->firstdb = be32_to_cpu(from->hdr.firstdb); - to->nvalid = be32_to_cpu(from->hdr.nvalid); - to->nused = be32_to_cpu(from->hdr.nused); - ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); -} - -static void -xfs_dir2_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); - - to->hdr.magic = cpu_to_be32(from->magic); - to->hdr.firstdb = cpu_to_be32(from->firstdb); - to->hdr.nvalid = cpu_to_be32(from->nvalid); - to->hdr.nused = cpu_to_be32(from->nused); -} - -static void -xfs_dir3_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; - - to->magic = be32_to_cpu(hdr3->hdr.magic); - to->firstdb = be32_to_cpu(hdr3->firstdb); - to->nvalid = be32_to_cpu(hdr3->nvalid); - to->nused = be32_to_cpu(hdr3->nused); - - ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); -} - -static void -xfs_dir3_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); - - hdr3->hdr.magic = cpu_to_be32(from->magic); - hdr3->firstdb = cpu_to_be32(from->firstdb); - hdr3->nvalid = cpu_to_be32(from->nvalid); - hdr3->nused = cpu_to_be32(from->nused); -} - -static const struct xfs_dir_ops xfs_dir2_ops = { - .sf_entsize = xfs_dir2_sf_entsize, - .sf_nextentry = xfs_dir2_sf_nextentry, - .sf_get_ftype = xfs_dir2_sfe_get_ftype, - .sf_put_ftype = xfs_dir2_sfe_put_ftype, - .sf_get_ino = xfs_dir2_sfe_get_ino, - .sf_put_ino = xfs_dir2_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir2_data_entsize, - .data_get_ftype = xfs_dir2_data_get_ftype, - .data_put_ftype = xfs_dir2_data_put_ftype, - .data_entry_tag_p = xfs_dir2_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_ftype_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir3_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir3_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), - - .data_dot_entry_p = xfs_dir3_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir3_data_first_entry_p, - .data_entry_p = xfs_dir3_data_entry_p, - .data_unused_p = xfs_dir3_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir3_max_leaf_ents, - .leaf_ents_p = xfs_dir3_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), - .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, - .free_max_bests = xfs_dir3_free_max_bests, - .free_bests_p = xfs_dir3_free_bests_p, - .db_to_fdb = xfs_dir3_db_to_fdb, - .db_to_fdindex = xfs_dir3_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, -}; - -static const struct xfs_dir_ops xfs_dir3_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, -}; - -/* - * Return the ops structure according to the current config. If we are passed - * an inode, then that overrides the default config we use which is based on - * feature bits. - */ -const struct xfs_dir_ops * -xfs_dir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_dir_inode_ops) - return mp->m_dir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_ops; - if (xfs_sb_version_hasftype(&mp->m_sb)) - return &xfs_dir2_ftype_ops; - return &xfs_dir2_ops; -} - -const struct xfs_dir_ops * -xfs_nondir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_nondir_inode_ops) - return mp->m_nondir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_nondir_ops; - return &xfs_dir2_nondir_ops; -} diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c deleted file mode 100644 index a0aca734199b..000000000000 --- a/fs/xfs/xfs_dir2.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_dinode.h" - -struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; - - -/* - * ASCII case-insensitive (ie. A-Z) support for directories that was - * used in IRIX. - */ -STATIC xfs_dahash_t -xfs_ascii_ci_hashname( - struct xfs_name *name) -{ - xfs_dahash_t hash; - int i; - - for (i = 0, hash = 0; i < name->len; i++) - hash = tolower(name->name[i]) ^ rol32(hash, 7); - - return hash; -} - -STATIC enum xfs_dacmp -xfs_ascii_ci_compname( - struct xfs_da_args *args, - const unsigned char *name, - int len) -{ - enum xfs_dacmp result; - int i; - - if (args->namelen != len) - return XFS_CMP_DIFFERENT; - - result = XFS_CMP_EXACT; - for (i = 0; i < len; i++) { - if (args->name[i] == name[i]) - continue; - if (tolower(args->name[i]) != tolower(name[i])) - return XFS_CMP_DIFFERENT; - result = XFS_CMP_CASE; - } - - return result; -} - -static struct xfs_nameops xfs_ascii_ci_nameops = { - .hashname = xfs_ascii_ci_hashname, - .compname = xfs_ascii_ci_compname, -}; - -int -xfs_da_mount( - struct xfs_mount *mp) -{ - struct xfs_da_geometry *dageo; - int nodehdr_size; - - - ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= - XFS_MAX_BLOCKSIZE); - - mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); - mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); - - nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; - mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_SLEEP | KM_MAYFAIL); - mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_SLEEP | KM_MAYFAIL); - if (!mp->m_dir_geo || !mp->m_attr_geo) { - kmem_free(mp->m_dir_geo); - kmem_free(mp->m_attr_geo); - return ENOMEM; - } - - /* set up directory geometry */ - dageo = mp->m_dir_geo; - dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; - dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; - dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; - - /* - * Now we've set up the block conversion variables, we can calculate the - * segment block constants using the geometry structure. - */ - dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); - dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); - dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); - dageo->node_ents = (dageo->blksize - nodehdr_size) / - (uint)sizeof(xfs_da_node_entry_t); - dageo->magicpct = (dageo->blksize * 37) / 100; - - /* set up attribute geometry - single fsb only */ - dageo = mp->m_attr_geo; - dageo->blklog = mp->m_sb.sb_blocklog; - dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; - dageo->fsbcount = 1; - dageo->node_ents = (dageo->blksize - nodehdr_size) / - (uint)sizeof(xfs_da_node_entry_t); - dageo->magicpct = (dageo->blksize * 37) / 100; - - if (xfs_sb_version_hasasciici(&mp->m_sb)) - mp->m_dirnameops = &xfs_ascii_ci_nameops; - else - mp->m_dirnameops = &xfs_default_nameops; - - return 0; -} - -void -xfs_da_unmount( - struct xfs_mount *mp) -{ - kmem_free(mp->m_dir_geo); - kmem_free(mp->m_attr_geo); -} - -/* - * Return 1 if directory contains only "." and "..". - */ -int -xfs_dir_isempty( - xfs_inode_t *dp) -{ - xfs_dir2_sf_hdr_t *sfp; - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - if (dp->i_d.di_size == 0) /* might happen during shutdown. */ - return 1; - if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) - return 0; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - return !sfp->count; -} - -/* - * Validate a given inode number. - */ -int -xfs_dir_ino_validate( - xfs_mount_t *mp, - xfs_ino_t ino) -{ - xfs_agblock_t agblkno; - xfs_agino_t agino; - xfs_agnumber_t agno; - int ino_ok; - int ioff; - - agno = XFS_INO_TO_AGNO(mp, ino); - agblkno = XFS_INO_TO_AGBNO(mp, ino); - ioff = XFS_INO_TO_OFFSET(mp, ino); - agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); - ino_ok = - agno < mp->m_sb.sb_agcount && - agblkno < mp->m_sb.sb_agblocks && - agblkno != 0 && - ioff < (1 << mp->m_sb.sb_inopblog) && - XFS_AGINO_TO_INO(mp, agno, agino) == ino; - if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, - XFS_RANDOM_DIR_INO_VALIDATE))) { - xfs_warn(mp, "Invalid inode number 0x%Lx", - (unsigned long long) ino); - XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - return 0; -} - -/* - * Initialize a directory with its "." and ".." entries. - */ -int -xfs_dir_init( - xfs_trans_t *tp, - xfs_inode_t *dp, - xfs_inode_t *pdp) -{ - struct xfs_da_args *args; - int error; - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); - if (error) - return error; - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->dp = dp; - args->trans = tp; - error = xfs_dir2_sf_create(args, pdp->i_ino); - kmem_free(args); - return error; -} - -/* - Enter a name in a directory. - */ -int -xfs_dir_createname( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, - xfs_ino_t inum, /* new entry inode number */ - xfs_fsblock_t *first, /* bmap's firstblock */ - xfs_bmap_free_t *flist, /* bmap's freeblock list */ - xfs_extlen_t total) /* bmap's total block count */ -{ - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - rval = xfs_dir_ino_validate(tp->t_mountp, inum); - if (rval) - return rval; - XFS_STATS_INC(xs_dir_create); - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->inumber = inum; - args->dp = dp; - args->firstblock = first; - args->flist = flist; - args->total = total; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_addname(args); - goto out_free; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_addname(args); - goto out_free; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_addname(args); - else - rval = xfs_dir2_node_addname(args); - -out_free: - kmem_free(args); - return rval; -} - -/* - * If doing a CI lookup and case-insensitive match, dup actual name into - * args.value. Return EEXIST for success (ie. name found) or an error. - */ -int -xfs_dir_cilookup_result( - struct xfs_da_args *args, - const unsigned char *name, - int len) -{ - if (args->cmpresult == XFS_CMP_DIFFERENT) - return ENOENT; - if (args->cmpresult != XFS_CMP_CASE || - !(args->op_flags & XFS_DA_OP_CILOOKUP)) - return EEXIST; - - args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); - if (!args->value) - return ENOMEM; - - memcpy(args->value, name, len); - args->valuelen = len; - return EEXIST; -} - -/* - * Lookup a name in a directory, give back the inode number. - * If ci_name is not NULL, returns the actual name in ci_name if it differs - * to name, or ci_name->name is set to NULL for an exact match. - */ - -int -xfs_dir_lookup( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, - xfs_ino_t *inum, /* out: inode number */ - struct xfs_name *ci_name) /* out: actual name if CI match */ -{ - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_lookup); - - /* - * We need to use KM_NOFS here so that lockdep will not throw false - * positive deadlock warnings on a non-transactional lookup path. It is - * safe to recurse into inode recalim in that case, but lockdep can't - * easily be taught about it. Hence KM_NOFS avoids having to add more - * lockdep Doing this avoids having to add a bunch of lockdep class - * annotations into the reclaim path for the ilock. - */ - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->dp = dp; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - args->op_flags = XFS_DA_OP_OKNOENT; - if (ci_name) - args->op_flags |= XFS_DA_OP_CILOOKUP; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_lookup(args); - goto out_check_rval; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_lookup(args); - goto out_check_rval; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_lookup(args); - else - rval = xfs_dir2_node_lookup(args); - -out_check_rval: - if (rval == EEXIST) - rval = 0; - if (!rval) { - *inum = args->inumber; - if (ci_name) { - ci_name->name = args->value; - ci_name->len = args->valuelen; - } - } -out_free: - kmem_free(args); - return rval; -} - -/* - * Remove an entry from a directory. - */ -int -xfs_dir_removename( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, - xfs_ino_t ino, - xfs_fsblock_t *first, /* bmap's firstblock */ - xfs_bmap_free_t *flist, /* bmap's freeblock list */ - xfs_extlen_t total) /* bmap's total block count */ -{ - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_remove); - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->inumber = ino; - args->dp = dp; - args->firstblock = first; - args->flist = flist; - args->total = total; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_removename(args); - goto out_free; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_removename(args); - goto out_free; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_removename(args); - else - rval = xfs_dir2_node_removename(args); -out_free: - kmem_free(args); - return rval; -} - -/* - * Replace the inode number of a directory entry. - */ -int -xfs_dir_replace( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, /* name of entry to replace */ - xfs_ino_t inum, /* new inode number */ - xfs_fsblock_t *first, /* bmap's firstblock */ - xfs_bmap_free_t *flist, /* bmap's freeblock list */ - xfs_extlen_t total) /* bmap's total block count */ -{ - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - - rval = xfs_dir_ino_validate(tp->t_mountp, inum); - if (rval) - return rval; - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->inumber = inum; - args->dp = dp; - args->firstblock = first; - args->flist = flist; - args->total = total; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_replace(args); - goto out_free; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_replace(args); - goto out_free; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_replace(args); - else - rval = xfs_dir2_node_replace(args); -out_free: - kmem_free(args); - return rval; -} - -/* - * See if this entry can be added to the directory without allocating space. - * First checks that the caller couldn't reserve enough space (resblks = 0). - */ -int -xfs_dir_canenter( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, /* name of entry to add */ - uint resblks) -{ - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - if (resblks) - return 0; - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->dp = dp; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | - XFS_DA_OP_OKNOENT; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_addname(args); - goto out_free; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_addname(args); - goto out_free; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_addname(args); - else - rval = xfs_dir2_node_addname(args); -out_free: - kmem_free(args); - return rval; -} - -/* - * Utility routines. - */ - -/* - * Add a block to the directory. - * - * This routine is for data and free blocks, not leaf/node blocks which are - * handled by xfs_da_grow_inode. - */ -int -xfs_dir2_grow_inode( - struct xfs_da_args *args, - int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ - xfs_dir2_db_t *dbp) /* out: block number added */ -{ - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - xfs_fileoff_t bno; /* directory offset of new block */ - int count; /* count of filesystem blocks */ - int error; - - trace_xfs_dir2_grow_inode(args, space); - - /* - * Set lowest possible block in the space requested. - */ - bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); - count = args->geo->fsbcount; - - error = xfs_da_grow_inode_int(args, &bno, count); - if (error) - return error; - - *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); - - /* - * Update file's size if this is the data space and it grew. - */ - if (space == XFS_DIR2_DATA_SPACE) { - xfs_fsize_t size; /* directory file (data) size */ - - size = XFS_FSB_TO_B(mp, bno + count); - if (size > dp->i_d.di_size) { - dp->i_d.di_size = size; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); - } - } - return 0; -} - -/* - * See if the directory is a single-block form directory. - */ -int -xfs_dir2_isblock( - struct xfs_da_args *args, - int *vp) /* out: 1 is block, 0 is not block */ -{ - xfs_fileoff_t last; /* last file offset */ - int rval; - - if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) - return rval; - rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); - *vp = rval; - return 0; -} - -/* - * See if the directory is a single-leaf form directory. - */ -int -xfs_dir2_isleaf( - struct xfs_da_args *args, - int *vp) /* out: 1 is block, 0 is not block */ -{ - xfs_fileoff_t last; /* last file offset */ - int rval; - - if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) - return rval; - *vp = last == args->geo->leafblk + args->geo->fsbcount; - return 0; -} - -/* - * Remove the given block from the directory. - * This routine is used for data and free blocks, leaf/node are done - * by xfs_da_shrink_inode. - */ -int -xfs_dir2_shrink_inode( - xfs_da_args_t *args, - xfs_dir2_db_t db, - struct xfs_buf *bp) -{ - xfs_fileoff_t bno; /* directory file offset */ - xfs_dablk_t da; /* directory file offset */ - int done; /* bunmap is finished */ - xfs_inode_t *dp; - int error; - xfs_mount_t *mp; - xfs_trans_t *tp; - - trace_xfs_dir2_shrink_inode(args, db); - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - da = xfs_dir2_db_to_da(args->geo, db); - /* - * Unmap the fsblock(s). - */ - if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, - XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, - &done))) { - /* - * ENOSPC actually can happen if we're in a removename with - * no space reservation, and the resulting block removal - * would cause a bmap btree split or conversion from extents - * to btree. This can only happen for un-fragmented - * directory blocks, since you need to be punching out - * the middle of an extent. - * In this case we need to leave the block in the file, - * and not binval it. - * So the block has to be in a consistent empty state - * and appropriately logged. - * We don't free up the buffer, the caller can tell it - * hasn't happened since it got an error back. - */ - return error; - } - ASSERT(done); - /* - * Invalidate the buffer from the transaction. - */ - xfs_trans_binval(tp, bp); - /* - * If it's not a data block, we're done. - */ - if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)) - return 0; - /* - * If the block isn't the last one in the directory, we're done. - */ - if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) - return 0; - bno = da; - if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { - /* - * This can't really happen unless there's kernel corruption. - */ - return error; - } - if (db == args->geo->datablk) - ASSERT(bno == 0); - else - ASSERT(bno > 0); - /* - * Set the size to the new last block. - */ - dp->i_d.di_size = XFS_FSB_TO_B(mp, bno); - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - return 0; -} diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c deleted file mode 100644 index ab0bffccf5c3..000000000000 --- a/fs/xfs/xfs_dir2_block.c +++ /dev/null @@ -1,1265 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_buf_item.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_dinode.h" - -/* - * Local function prototypes. - */ -static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp, - int first, int last); -static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp); -static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp, - int *entno); -static int xfs_dir2_block_sort(const void *a, const void *b); - -static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; - -/* - * One-time startup routine called from xfs_init(). - */ -void -xfs_dir_startup(void) -{ - xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1); - xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); -} - -static bool -xfs_dir3_block_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) - return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; - } else { - if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - return false; - } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; -} - -static void -xfs_dir3_block_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_dir3_block_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); -} - -const struct xfs_buf_ops xfs_dir3_block_buf_ops = { - .verify_read = xfs_dir3_block_read_verify, - .verify_write = xfs_dir3_block_write_verify, -}; - -int -xfs_dir3_block_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = dp->i_mount; - int err; - - err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, - XFS_DATA_FORK, &xfs_dir3_block_buf_ops); - if (!err && tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); - return err; -} - -static void -xfs_dir3_block_init( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *bp, - struct xfs_inode *dp) -{ - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - bp->b_ops = &xfs_dir3_block_buf_ops; - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - memset(hdr3, 0, sizeof(*hdr3)); - hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - hdr3->blkno = cpu_to_be64(bp->b_bn); - hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); - return; - - } - hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); -} - -static void -xfs_dir2_block_need_space( - struct xfs_inode *dp, - struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_block_tail *btp, - struct xfs_dir2_leaf_entry *blp, - __be16 **tagpp, - struct xfs_dir2_data_unused **dupp, - struct xfs_dir2_data_unused **enddupp, - int *compact, - int len) -{ - struct xfs_dir2_data_free *bf; - __be16 *tagp = NULL; - struct xfs_dir2_data_unused *dup = NULL; - struct xfs_dir2_data_unused *enddup = NULL; - - *compact = 0; - bf = dp->d_ops->data_bestfree_p(hdr); - - /* - * If there are stale entries we'll use one for the leaf. - */ - if (btp->stale) { - if (be16_to_cpu(bf[0].length) >= len) { - /* - * The biggest entry enough to avoid compaction. - */ - dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); - goto out; - } - - /* - * Will need to compact to make this work. - * Tag just before the first leaf entry. - */ - *compact = 1; - tagp = (__be16 *)blp - 1; - - /* Data object just before the first leaf entry. */ - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); - - /* - * If it's not free then the data will go where the - * leaf data starts now, if it works at all. - */ - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * - (uint)sizeof(*blp) < len) - dup = NULL; - } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) - dup = NULL; - else - dup = (xfs_dir2_data_unused_t *)blp; - goto out; - } - - /* - * no stale entries, so just use free space. - * Tag just before the first leaf entry. - */ - tagp = (__be16 *)blp - 1; - - /* Data object just before the first leaf entry. */ - enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); - - /* - * If it's not free then can't do this add without cleaning up: - * the space before the first leaf entry needs to be free so it - * can be expanded to hold the pointer to the new entry. - */ - if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - /* - * Check out the biggest freespace and see if it's the same one. - */ - dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); - if (dup != enddup) { - /* - * Not the same free entry, just check its length. - */ - if (be16_to_cpu(dup->length) < len) - dup = NULL; - goto out; - } - - /* - * It is the biggest freespace, can it hold the leaf too? - */ - if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { - /* - * Yes, use the second-largest entry instead if it works. - */ - if (be16_to_cpu(bf[1].length) >= len) - dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[1].offset)); - else - dup = NULL; - } - } -out: - *tagpp = tagp; - *dupp = dup; - *enddupp = enddup; -} - -/* - * compact the leaf entries. - * Leave the highest-numbered stale entry stale. - * XXX should be the one closest to mid but mid is not yet computed. - */ -static void -xfs_dir2_block_compact( - struct xfs_da_args *args, - struct xfs_buf *bp, - struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_block_tail *btp, - struct xfs_dir2_leaf_entry *blp, - int *needlog, - int *lfloghigh, - int *lfloglow) -{ - int fromidx; /* source leaf index */ - int toidx; /* target leaf index */ - int needscan = 0; - int highstale; /* high stale index */ - - fromidx = toidx = be32_to_cpu(btp->count) - 1; - highstale = *lfloghigh = -1; - for (; fromidx >= 0; fromidx--) { - if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { - if (highstale == -1) - highstale = toidx; - else { - if (*lfloghigh == -1) - *lfloghigh = toidx; - continue; - } - } - if (fromidx < toidx) - blp[toidx] = blp[fromidx]; - toidx--; - } - *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); - *lfloghigh -= be32_to_cpu(btp->stale) - 1; - be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(args, bp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), - needlog, &needscan); - btp->stale = cpu_to_be32(1); - /* - * If we now need to rebuild the bestfree map, do so. - * This needs to happen before the next call to use_free. - */ - if (needscan) - xfs_dir2_data_freescan(args->dp, hdr, needlog); -} - -/* - * Add an entry to a block directory. - */ -int /* error */ -xfs_dir2_block_addname( - xfs_da_args_t *args) /* directory op arguments */ -{ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - struct xfs_buf *bp; /* buffer for block */ - xfs_dir2_block_tail_t *btp; /* block tail */ - int compact; /* need to compact leaf ents */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_inode_t *dp; /* directory inode */ - xfs_dir2_data_unused_t *dup; /* block unused entry */ - int error; /* error return value */ - xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */ - xfs_dahash_t hash; /* hash value of found entry */ - int high; /* high index for binary srch */ - int highstale; /* high stale index */ - int lfloghigh=0; /* last final leaf to log */ - int lfloglow=0; /* first final leaf to log */ - int len; /* length of the new entry */ - int low; /* low index for binary srch */ - int lowstale; /* low stale index */ - int mid=0; /* midpoint for binary srch */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log header */ - int needscan; /* need to rescan freespace */ - __be16 *tagp; /* pointer to tag value */ - xfs_trans_t *tp; /* transaction structure */ - - trace_xfs_dir2_block_addname(args); - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - - /* Read the (one and only) directory block into bp. */ - error = xfs_dir3_block_read(tp, dp, &bp); - if (error) - return error; - - len = dp->d_ops->data_entsize(args->namelen); - - /* - * Set up pointers to parts of the block. - */ - hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - - /* - * Find out if we can reuse stale entries or whether we need extra - * space for entry and new leaf. - */ - xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, - &enddup, &compact, len); - - /* - * Done everything we need for a space check now. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - xfs_trans_brelse(tp, bp); - if (!dup) - return ENOSPC; - return 0; - } - - /* - * If we don't have space for the new entry & leaf ... - */ - if (!dup) { - /* Don't have a space reservation: return no-space. */ - if (args->total == 0) - return ENOSPC; - /* - * Convert to the next larger format. - * Then add the new entry in that format. - */ - error = xfs_dir2_block_to_leaf(args, bp); - if (error) - return error; - return xfs_dir2_leaf_addname(args); - } - - needlog = needscan = 0; - - /* - * If need to compact the leaf entries, do it now. - */ - if (compact) { - xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, - &lfloghigh, &lfloglow); - /* recalculate blp post-compaction */ - blp = xfs_dir2_block_leaf_p(btp); - } else if (btp->stale) { - /* - * Set leaf logging boundaries to impossible state. - * For the no-stale case they're set explicitly. - */ - lfloglow = be32_to_cpu(btp->count); - lfloghigh = -1; - } - - /* - * Find the slot that's first lower than our hash value, -1 if none. - */ - for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) { - mid = (low + high) >> 1; - if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) - break; - if (hash < args->hashval) - low = mid + 1; - else - high = mid - 1; - } - while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) { - mid--; - } - /* - * No stale entries, will use enddup space to hold new leaf. - */ - if (!btp->stale) { - /* - * Mark the space needed for the new leaf entry, now in use. - */ - xfs_dir2_data_use_free(args, bp, enddup, - (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - - sizeof(*blp)), - (xfs_dir2_data_aoff_t)sizeof(*blp), - &needlog, &needscan); - /* - * Update the tail (entry count). - */ - be32_add_cpu(&btp->count, 1); - /* - * If we now need to rebuild the bestfree map, do so. - * This needs to happen before the next call to use_free. - */ - if (needscan) { - xfs_dir2_data_freescan(dp, hdr, &needlog); - needscan = 0; - } - /* - * Adjust pointer to the first leaf entry, we're about to move - * the table up one to open up space for the new leaf entry. - * Then adjust our index to match. - */ - blp--; - mid++; - if (mid) - memmove(blp, &blp[1], mid * sizeof(*blp)); - lfloglow = 0; - lfloghigh = mid; - } - /* - * Use a stale leaf for our new entry. - */ - else { - for (lowstale = mid; - lowstale >= 0 && - blp[lowstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - lowstale--) - continue; - for (highstale = mid + 1; - highstale < be32_to_cpu(btp->count) && - blp[highstale].address != - cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && - (lowstale < 0 || mid - lowstale > highstale - mid); - highstale++) - continue; - /* - * Move entries toward the low-numbered stale entry. - */ - if (lowstale >= 0 && - (highstale == be32_to_cpu(btp->count) || - mid - lowstale <= highstale - mid)) { - if (mid - lowstale) - memmove(&blp[lowstale], &blp[lowstale + 1], - (mid - lowstale) * sizeof(*blp)); - lfloglow = MIN(lowstale, lfloglow); - lfloghigh = MAX(mid, lfloghigh); - } - /* - * Move entries toward the high-numbered stale entry. - */ - else { - ASSERT(highstale < be32_to_cpu(btp->count)); - mid++; - if (highstale - mid) - memmove(&blp[mid + 1], &blp[mid], - (highstale - mid) * sizeof(*blp)); - lfloglow = MIN(mid, lfloglow); - lfloghigh = MAX(highstale, lfloghigh); - } - be32_add_cpu(&btp->stale, -1); - } - /* - * Point to the new data entry. - */ - dep = (xfs_dir2_data_entry_t *)dup; - /* - * Fill in the leaf entry. - */ - blp[mid].hashval = cpu_to_be32(args->hashval); - blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); - xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); - /* - * Mark space for the data entry used. - */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - (xfs_dir2_data_aoff_t)len, &needlog, &needscan); - /* - * Create the new data entry. - */ - dep->inumber = cpu_to_be64(args->inumber); - dep->namelen = args->namelen; - memcpy(dep->name, args->name, args->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - /* - * Clean up the bestfree array and log the header, tail, and entry. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - if (needlog) - xfs_dir2_data_log_header(args, bp); - xfs_dir2_block_log_tail(tp, bp); - xfs_dir2_data_log_entry(args, bp, dep); - xfs_dir3_data_check(dp, bp); - return 0; -} - -/* - * Log leaf entries from the block. - */ -static void -xfs_dir2_block_log_leaf( - xfs_trans_t *tp, /* transaction structure */ - struct xfs_buf *bp, /* block buffer */ - int first, /* index of first logged leaf */ - int last) /* index of last logged leaf */ -{ - xfs_dir2_data_hdr_t *hdr = bp->b_addr; - xfs_dir2_leaf_entry_t *blp; - xfs_dir2_block_tail_t *btp; - - btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), - (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); -} - -/* - * Log the block tail. - */ -static void -xfs_dir2_block_log_tail( - xfs_trans_t *tp, /* transaction structure */ - struct xfs_buf *bp) /* block buffer */ -{ - xfs_dir2_data_hdr_t *hdr = bp->b_addr; - xfs_dir2_block_tail_t *btp; - - btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); - xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), - (uint)((char *)(btp + 1) - (char *)hdr - 1)); -} - -/* - * Look up an entry in the block. This is the external routine, - * xfs_dir2_block_lookup_int does the real work. - */ -int /* error */ -xfs_dir2_block_lookup( - xfs_da_args_t *args) /* dir lookup arguments */ -{ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_inode_t *dp; /* incore inode */ - int ent; /* entry index */ - int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ - - trace_xfs_dir2_block_lookup(args); - - /* - * Get the buffer, look up the entry. - * If not found (ENOENT) then return, have no buffer. - */ - if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) - return error; - dp = args->dp; - mp = dp->i_mount; - hdr = bp->b_addr; - xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - /* - * Get the offset from the leaf entry, to point to the data. - */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(blp[ent].address))); - /* - * Fill in inode number, CI name if appropriate, release the block. - */ - args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); - error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_trans_brelse(args->trans, bp); - return error; -} - -/* - * Internal block lookup routine. - */ -static int /* error */ -xfs_dir2_block_lookup_int( - xfs_da_args_t *args, /* dir lookup arguments */ - struct xfs_buf **bpp, /* returned block buffer */ - int *entno) /* returned entry number */ -{ - xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_inode_t *dp; /* incore inode */ - int error; /* error return value */ - xfs_dahash_t hash; /* found hash value */ - int high; /* binary search high index */ - int low; /* binary search low index */ - int mid; /* binary search current idx */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - enum xfs_dacmp cmp; /* comparison result */ - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - - error = xfs_dir3_block_read(tp, dp, &bp); - if (error) - return error; - - hdr = bp->b_addr; - xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - /* - * Loop doing a binary search for our hash value. - * Find our entry, ENOENT if it's not there. - */ - for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) { - ASSERT(low <= high); - mid = (low + high) >> 1; - if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) - break; - if (hash < args->hashval) - low = mid + 1; - else - high = mid - 1; - if (low > high) { - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - xfs_trans_brelse(tp, bp); - return ENOENT; - } - } - /* - * Back up to the first one with the right hash value. - */ - while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) { - mid--; - } - /* - * Now loop forward through all the entries with the - * right hash value looking for our name. - */ - do { - if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR) - continue; - /* - * Get pointer to the entry from the leaf. - */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); - /* - * Compare name and if it's an exact match, return the index - * and buffer. If it's the first case-insensitive match, store - * the index and buffer and continue looking for an exact match. - */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); - if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { - args->cmpresult = cmp; - *bpp = bp; - *entno = mid; - if (cmp == XFS_CMP_EXACT) - return 0; - } - } while (++mid < be32_to_cpu(btp->count) && - be32_to_cpu(blp[mid].hashval) == hash); - - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - /* - * Here, we can only be doing a lookup (not a rename or replace). - * If a case-insensitive match was found earlier, return success. - */ - if (args->cmpresult == XFS_CMP_CASE) - return 0; - /* - * No match, release the buffer and return ENOENT. - */ - xfs_trans_brelse(tp, bp); - return ENOENT; -} - -/* - * Remove an entry from a block format directory. - * If that makes the block small enough to fit in shortform, transform it. - */ -int /* error */ -xfs_dir2_block_removename( - xfs_da_args_t *args) /* directory operation args */ -{ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_inode_t *dp; /* incore inode */ - int ent; /* block leaf entry index */ - int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log block header */ - int needscan; /* need to fixup bestfree */ - xfs_dir2_sf_hdr_t sfh; /* shortform header */ - int size; /* shortform size */ - xfs_trans_t *tp; /* transaction pointer */ - - trace_xfs_dir2_block_removename(args); - - /* - * Look up the entry in the block. Gets the buffer and entry index. - * It will always be there, the vnodeops level does a lookup first. - */ - if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { - return error; - } - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - /* - * Point to the data entry using the leaf entry. - */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(blp[ent].address))); - /* - * Mark the data entry's space free. - */ - needlog = needscan = 0; - xfs_dir2_data_make_free(args, bp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); - /* - * Fix up the block tail. - */ - be32_add_cpu(&btp->stale, 1); - xfs_dir2_block_log_tail(tp, bp); - /* - * Remove the leaf entry by marking it stale. - */ - blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir2_block_log_leaf(tp, bp, ent, ent); - /* - * Fix up bestfree, log the header if necessary. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - if (needlog) - xfs_dir2_data_log_header(args, bp); - xfs_dir3_data_check(dp, bp); - /* - * See if the size as a shortform is good enough. - */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) - return 0; - - /* - * If it works, do the conversion. - */ - return xfs_dir2_block_to_sf(args, bp, size, &sfh); -} - -/* - * Replace an entry in a V2 block directory. - * Change the inode number to the new value. - */ -int /* error */ -xfs_dir2_block_replace( - xfs_da_args_t *args) /* directory operation args */ -{ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_inode_t *dp; /* incore inode */ - int ent; /* leaf entry index */ - int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ - - trace_xfs_dir2_block_replace(args); - - /* - * Lookup the entry in the directory. Get buffer and entry index. - * This will always succeed since the caller has already done a lookup. - */ - if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { - return error; - } - dp = args->dp; - mp = dp->i_mount; - hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - /* - * Point to the data entry we need to change. - */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(blp[ent].address))); - ASSERT(be64_to_cpu(dep->inumber) != args->inumber); - /* - * Change the inode number to the new value. - */ - dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args, bp, dep); - xfs_dir3_data_check(dp, bp); - return 0; -} - -/* - * Qsort comparison routine for the block leaf entries. - */ -static int /* sort order */ -xfs_dir2_block_sort( - const void *a, /* first leaf entry */ - const void *b) /* second leaf entry */ -{ - const xfs_dir2_leaf_entry_t *la; /* first leaf entry */ - const xfs_dir2_leaf_entry_t *lb; /* second leaf entry */ - - la = a; - lb = b; - return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 : - (be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0); -} - -/* - * Convert a V2 leaf directory to a V2 block directory if possible. - */ -int /* error */ -xfs_dir2_leaf_to_block( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *lbp, /* leaf buffer */ - struct xfs_buf *dbp) /* data buffer */ -{ - __be16 *bestsp; /* leaf bests table */ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* unused data entry */ - int error; /* error return value */ - int from; /* leaf from index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* file system mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to scan for bestfree */ - xfs_dir2_sf_hdr_t sfh; /* shortform header */ - int size; /* bytes used */ - __be16 *tagp; /* end of entry (tag) */ - int to; /* block/leaf to index */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - trace_xfs_dir2_leaf_to_block(args); - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - - ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || - leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); - /* - * If there are data blocks other than the first one, take this - * opportunity to remove trailing empty data blocks that may have - * been left behind during no-space-reservation operations. - * These will show up in the leaf bests table. - */ - while (dp->i_d.di_size > args->geo->blksize) { - int hdrsz; - - hdrsz = dp->d_ops->data_entry_offset; - bestsp = xfs_dir2_leaf_bests_p(ltp); - if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - args->geo->blksize - hdrsz) { - if ((error = - xfs_dir2_leaf_trim_data(args, lbp, - (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) - return error; - } else - return 0; - } - /* - * Read the data block if we don't already have it, give up if it fails. - */ - if (!dbp) { - error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); - if (error) - return error; - } - hdr = dbp->b_addr; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); - - /* - * Size of the "leaf" area in the block. - */ - size = (uint)sizeof(xfs_dir2_block_tail_t) + - (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale); - /* - * Look at the last data entry. - */ - tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1; - dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); - /* - * If it's not free or is too short we can't do it. - */ - if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG || - be16_to_cpu(dup->length) < size) - return 0; - - /* - * Start converting it to block form. - */ - xfs_dir3_block_init(mp, tp, dbp, dp); - - needlog = 1; - needscan = 0; - /* - * Use up the space at the end of the block (blp/btp). - */ - xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, - &needlog, &needscan); - /* - * Initialize the block tail. - */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); - btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale); - btp->stale = 0; - xfs_dir2_block_log_tail(tp, dbp); - /* - * Initialize the block leaf area. We compact out stale entries. - */ - lep = xfs_dir2_block_leaf_p(btp); - for (from = to = 0; from < leafhdr.count; from++) { - if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - continue; - lep[to++] = ents[from]; - } - ASSERT(to == be32_to_cpu(btp->count)); - xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); - /* - * Scan the bestfree if we need it and log the data block header. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - if (needlog) - xfs_dir2_data_log_header(args, dbp); - /* - * Pitch the old leaf block. - */ - error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp); - if (error) - return error; - - /* - * Now see if the resulting block can be shrunken to shortform. - */ - size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) - return 0; - - return xfs_dir2_block_to_sf(args, dbp, size, &sfh); -} - -/* - * Convert the shortform directory to block form. - */ -int /* error */ -xfs_dir2_sf_to_block( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_dir2_db_t blkno; /* dir-relative block # (0) */ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - int dummy; /* trash */ - xfs_dir2_data_unused_t *dup; /* unused entry pointer */ - int endoffset; /* end of data objects */ - int error; /* error return value */ - int i; /* index */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log block header */ - int needscan; /* need to scan block freespc */ - int newoffset; /* offset from current entry */ - int offset; /* target block offset */ - xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ - xfs_dir2_sf_hdr_t *sfp; /* shortform header */ - __be16 *tagp; /* end of data entry */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_name name; - struct xfs_ifork *ifp; - - trace_xfs_dir2_sf_to_block(args); - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); - ASSERT(ifp->if_flags & XFS_IFINLINE); - /* - * Bomb out if the shortform directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return EIO; - } - - oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; - - ASSERT(ifp->if_bytes == dp->i_d.di_size); - ASSERT(ifp->if_u1.if_data != NULL); - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); - ASSERT(dp->i_d.di_nextents == 0); - - /* - * Copy the directory into a temporary buffer. - * Then pitch the incore inode data so we can make extents. - */ - sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); - memcpy(sfp, oldsfp, ifp->if_bytes); - - xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); - dp->i_d.di_size = 0; - - /* - * Add block 0 to the inode. - */ - error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); - if (error) { - kmem_free(sfp); - return error; - } - /* - * Initialize the data block, then convert it to block format. - */ - error = xfs_dir3_data_init(args, blkno, &bp); - if (error) { - kmem_free(sfp); - return error; - } - xfs_dir3_block_init(mp, tp, bp, dp); - hdr = bp->b_addr; - - /* - * Compute size of block "tail" area. - */ - i = (uint)sizeof(*btp) + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); - /* - * The whole thing is initialized to free by the init routine. - * Say we're using the leaf and tail area. - */ - dup = dp->d_ops->data_unused_p(hdr); - needlog = needscan = 0; - xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, - i, &needlog, &needscan); - ASSERT(needscan == 0); - /* - * Fill in the tail. - */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); - btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ - btp->stale = 0; - blp = xfs_dir2_block_leaf_p(btp); - endoffset = (uint)((char *)blp - (char *)hdr); - /* - * Remove the freespace, we'll manage it. - */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - be16_to_cpu(dup->length), &needlog, &needscan); - /* - * Create entry for . - */ - dep = dp->d_ops->data_dot_entry_p(hdr); - dep->inumber = cpu_to_be64(dp->i_ino); - dep->namelen = 1; - dep->name[0] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(args, bp, dep); - blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); - blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); - /* - * Create entry for .. - */ - dep = dp->d_ops->data_dotdot_entry_p(hdr); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); - dep->namelen = 2; - dep->name[0] = dep->name[1] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(args, bp, dep); - blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); - blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); - offset = dp->d_ops->data_first_offset; - /* - * Loop over existing entries, stuff them in. - */ - i = 0; - if (!sfp->count) - sfep = NULL; - else - sfep = xfs_dir2_sf_firstentry(sfp); - /* - * Need to preserve the existing offset values in the sf directory. - * Insert holes (unused entries) where necessary. - */ - while (offset < endoffset) { - /* - * sfep is null when we reach the end of the list. - */ - if (sfep == NULL) - newoffset = endoffset; - else - newoffset = xfs_dir2_sf_get_offset(sfep); - /* - * There should be a hole here, make one. - */ - if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); - dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - dup->length = cpu_to_be16(newoffset - offset); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)hdr)); - xfs_dir2_data_log_unused(args, bp, dup); - xfs_dir2_data_freeinsert(hdr, - dp->d_ops->data_bestfree_p(hdr), - dup, &dummy); - offset += be16_to_cpu(dup->length); - continue; - } - /* - * Copy a real entry. - */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); - dep->namelen = sfep->namelen; - dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); - memcpy(dep->name, sfep->name, dep->namelen); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(args, bp, dep); - name.name = sfep->name; - name.len = sfep->namelen; - blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> - hashname(&name)); - blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); - offset = (int)((char *)(tagp + 1) - (char *)hdr); - if (++i == sfp->count) - sfep = NULL; - else - sfep = dp->d_ops->sf_nextentry(sfp, sfep); - } - /* Done with the temporary buffer */ - kmem_free(sfp); - /* - * Sort the leaf entries by hash value. - */ - xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort); - /* - * Log the leaf entry area and tail. - * Already logged the header in data_init, ignore needlog. - */ - ASSERT(needscan == 0); - xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); - xfs_dir2_block_log_tail(tp, bp); - xfs_dir3_data_check(dp, bp); - return 0; -} diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c deleted file mode 100644 index 8c2f6422648e..000000000000 --- a/fs/xfs/xfs_dir2_data.c +++ /dev/null @@ -1,1050 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_error.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" - -/* - * Check the consistency of the data block. - * The input can also be a block-format directory. - * Return 0 is the buffer is good, otherwise an error. - */ -int -__xfs_dir3_data_check( - struct xfs_inode *dp, /* incore inode pointer */ - struct xfs_buf *bp) /* data block's buffer */ -{ - xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ - xfs_dir2_data_free_t *bf; /* bestfree table */ - xfs_dir2_block_tail_t *btp=NULL; /* block tail */ - int count; /* count of entries found */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_data_entry_t *dep; /* data entry */ - xfs_dir2_data_free_t *dfp; /* bestfree entry */ - xfs_dir2_data_unused_t *dup; /* unused entry */ - char *endp; /* end of useful data */ - int freeseen; /* mask of bestfrees seen */ - xfs_dahash_t hash; /* hash of current name */ - int i; /* leaf index */ - int lastfree; /* last entry was unused */ - xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ - xfs_mount_t *mp; /* filesystem mount point */ - char *p; /* current data position */ - int stale; /* count of stale leaves */ - struct xfs_name name; - const struct xfs_dir_ops *ops; - struct xfs_da_geometry *geo; - - mp = bp->b_target->bt_mount; - geo = mp->m_dir_geo; - - /* - * We can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. - */ - ops = xfs_dir_get_ops(mp, dp); - - hdr = bp->b_addr; - p = (char *)ops->data_entry_p(hdr); - - switch (hdr->magic) { - case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): - case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): - btp = xfs_dir2_block_tail_p(geo, hdr); - lep = xfs_dir2_block_leaf_p(btp); - endp = (char *)lep; - - /* - * The number of leaf entries is limited by the size of the - * block and the amount of space used by the data entries. - * We don't know how much space is used by the data entries yet, - * so just ensure that the count falls somewhere inside the - * block right now. - */ - XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); - break; - case cpu_to_be32(XFS_DIR3_DATA_MAGIC): - case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + geo->blksize; - break; - default: - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - /* - * Account for zero bestfree entries. - */ - bf = ops->data_bestfree_p(hdr); - count = lastfree = freeseen = 0; - if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); - freeseen |= 1 << 0; - } - if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); - freeseen |= 1 << 1; - } - if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); - freeseen |= 1 << 2; - } - - XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= - be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= - be16_to_cpu(bf[2].length)); - /* - * Loop over the data/unused entries. - */ - while (p < endp) { - dup = (xfs_dir2_data_unused_t *)p; - /* - * If it's unused, look for the space in the bestfree table. - * If we find it, account for that, else make sure it - * doesn't need to be there. - */ - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(lastfree == 0); - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); - dfp = xfs_dir2_data_freefind(hdr, bf, dup); - if (dfp) { - i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN( - (freeseen & (1 << i)) == 0); - freeseen |= 1 << i; - } else { - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); - } - p += be16_to_cpu(dup->length); - lastfree = 1; - continue; - } - /* - * It's a real entry. Validate the fields. - * If this is a block directory then make sure it's - * in the leaf section of the block. - * The linear search is crude but this is DEBUG code. - */ - dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN( - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(*ops->data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN( - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); - count++; - lastfree = 0; - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, - (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); - name.name = dep->name; - name.len = dep->namelen; - hash = mp->m_dirnameops->hashname(&name); - for (i = 0; i < be32_to_cpu(btp->count); i++) { - if (be32_to_cpu(lep[i].address) == addr && - be32_to_cpu(lep[i].hashval) == hash) - break; - } - XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); - } - p += ops->data_entsize(dep->namelen); - } - /* - * Need to have seen all the entries and all the bestfree slots. - */ - XFS_WANT_CORRUPTED_RETURN(freeseen == 7); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { - if (lep[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - stale++; - if (i > 0) - XFS_WANT_CORRUPTED_RETURN( - be32_to_cpu(lep[i].hashval) >= - be32_to_cpu(lep[i - 1].hashval)); - } - XFS_WANT_CORRUPTED_RETURN(count == - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); - } - return 0; -} - -static bool -xfs_dir3_data_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; - } else { - if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - return false; - } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; -} - -/* - * Readahead of the first block of the directory when it is opened is completely - * oblivious to the format of the directory. Hence we can either get a block - * format buffer or a data format buffer on readahead. - */ -static void -xfs_dir3_data_reada_verify( - struct xfs_buf *bp) -{ - struct xfs_dir2_data_hdr *hdr = bp->b_addr; - - switch (hdr->magic) { - case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): - case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): - bp->b_ops = &xfs_dir3_block_buf_ops; - bp->b_ops->verify_read(bp); - return; - case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - case cpu_to_be32(XFS_DIR3_DATA_MAGIC): - xfs_dir3_data_verify(bp); - return; - default: - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - break; - } -} - -static void -xfs_dir3_data_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_dir3_data_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); -} - -const struct xfs_buf_ops xfs_dir3_data_buf_ops = { - .verify_read = xfs_dir3_data_read_verify, - .verify_write = xfs_dir3_data_write_verify, -}; - -static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { - .verify_read = xfs_dir3_data_reada_verify, - .verify_write = xfs_dir3_data_write_verify, -}; - - -int -xfs_dir3_data_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mapped_bno, - struct xfs_buf **bpp) -{ - int err; - - err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, - XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); - return err; -} - -int -xfs_dir3_data_readahead( - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mapped_bno) -{ - return xfs_da_reada_buf(dp, bno, mapped_bno, - XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); -} - -/* - * Given a data block and an unused entry from that block, - * return the bestfree entry if any that corresponds to it. - */ -xfs_dir2_data_free_t * -xfs_dir2_data_freefind( - struct xfs_dir2_data_hdr *hdr, /* data block header */ - struct xfs_dir2_data_free *bf, /* bestfree table pointer */ - struct xfs_dir2_data_unused *dup) /* unused space */ -{ - xfs_dir2_data_free_t *dfp; /* bestfree entry */ - xfs_dir2_data_aoff_t off; /* offset value needed */ -#ifdef DEBUG - int matched; /* matched the value */ - int seenzero; /* saw a 0 bestfree entry */ -#endif - - off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); - -#ifdef DEBUG - /* - * Validate some consistency in the bestfree table. - * Check order, non-overlapping entries, and if we find the - * one we're looking for it has to be exact. - */ - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - for (dfp = &bf[0], seenzero = matched = 0; - dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; - dfp++) { - if (!dfp->offset) { - ASSERT(!dfp->length); - seenzero = 1; - continue; - } - ASSERT(seenzero == 0); - if (be16_to_cpu(dfp->offset) == off) { - matched = 1; - ASSERT(dfp->length == dup->length); - } else if (off < be16_to_cpu(dfp->offset)) - ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset)); - else - ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); - ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); - if (dfp > &bf[0]) - ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); - } -#endif - /* - * If this is smaller than the smallest bestfree entry, - * it can't be there since they're sorted. - */ - if (be16_to_cpu(dup->length) < - be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) - return NULL; - /* - * Look at the three bestfree entries for our guy. - */ - for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { - if (!dfp->offset) - return NULL; - if (be16_to_cpu(dfp->offset) == off) - return dfp; - } - /* - * Didn't find it. This only happens if there are duplicate lengths. - */ - return NULL; -} - -/* - * Insert an unused-space entry into the bestfree table. - */ -xfs_dir2_data_free_t * /* entry inserted */ -xfs_dir2_data_freeinsert( - struct xfs_dir2_data_hdr *hdr, /* data block pointer */ - struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ - struct xfs_dir2_data_unused *dup, /* unused space */ - int *loghead) /* log the data header (out) */ -{ - xfs_dir2_data_free_t new; /* new bestfree entry */ - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - - new.length = dup->length; - new.offset = cpu_to_be16((char *)dup - (char *)hdr); - - /* - * Insert at position 0, 1, or 2; or not at all. - */ - if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) { - dfp[2] = dfp[1]; - dfp[1] = dfp[0]; - dfp[0] = new; - *loghead = 1; - return &dfp[0]; - } - if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) { - dfp[2] = dfp[1]; - dfp[1] = new; - *loghead = 1; - return &dfp[1]; - } - if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) { - dfp[2] = new; - *loghead = 1; - return &dfp[2]; - } - return NULL; -} - -/* - * Remove a bestfree entry from the table. - */ -STATIC void -xfs_dir2_data_freeremove( - struct xfs_dir2_data_hdr *hdr, /* data block header */ - struct xfs_dir2_data_free *bf, /* bestfree table pointer */ - struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ - int *loghead) /* out: log data header */ -{ - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - - /* - * It's the first entry, slide the next 2 up. - */ - if (dfp == &bf[0]) { - bf[0] = bf[1]; - bf[1] = bf[2]; - } - /* - * It's the second entry, slide the 3rd entry up. - */ - else if (dfp == &bf[1]) - bf[1] = bf[2]; - /* - * Must be the last entry. - */ - else - ASSERT(dfp == &bf[2]); - /* - * Clear the 3rd entry, must be zero now. - */ - bf[2].length = 0; - bf[2].offset = 0; - *loghead = 1; -} - -/* - * Given a data block, reconstruct its bestfree map. - */ -void -xfs_dir2_data_freescan( - struct xfs_inode *dp, - struct xfs_dir2_data_hdr *hdr, - int *loghead) -{ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_dir2_data_entry_t *dep; /* active data entry */ - xfs_dir2_data_unused_t *dup; /* unused data entry */ - struct xfs_dir2_data_free *bf; - char *endp; /* end of block's data */ - char *p; /* current entry pointer */ - struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - - /* - * Start by clearing the table. - */ - bf = dp->d_ops->data_bestfree_p(hdr); - memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); - *loghead = 1; - /* - * Set up pointers. - */ - p = (char *)dp->d_ops->data_entry_p(hdr); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(geo, hdr); - endp = (char *)xfs_dir2_block_leaf_p(btp); - } else - endp = (char *)hdr + geo->blksize; - /* - * Loop over the block's entries. - */ - while (p < endp) { - dup = (xfs_dir2_data_unused_t *)p; - /* - * If it's a free entry, insert it. - */ - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)hdr == - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); - p += be16_to_cpu(dup->length); - } - /* - * For active entries, check their tags and skip them. - */ - else { - dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)hdr == - be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); - p += dp->d_ops->data_entsize(dep->namelen); - } - } -} - -/* - * Initialize a data block at the given block number in the directory. - * Give back the buffer for the created block. - */ -int /* error */ -xfs_dir3_data_init( - xfs_da_args_t *args, /* directory operation args */ - xfs_dir2_db_t blkno, /* logical dir block number */ - struct xfs_buf **bpp) /* output block buffer */ -{ - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* unused entry pointer */ - struct xfs_dir2_data_free *bf; - int error; /* error return value */ - int i; /* bestfree index */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - int t; /* temp */ - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - /* - * Get the buffer set up for the block. - */ - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), - -1, &bp, XFS_DATA_FORK); - if (error) - return error; - bp->b_ops = &xfs_dir3_data_buf_ops; - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); - - /* - * Initialize the header. - */ - hdr = bp->b_addr; - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - memset(hdr3, 0, sizeof(*hdr3)); - hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); - hdr3->blkno = cpu_to_be64(bp->b_bn); - hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); - - } else - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - - bf = dp->d_ops->data_bestfree_p(hdr); - bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); - for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - bf[i].length = 0; - bf[i].offset = 0; - } - - /* - * Set up an unused entry for the block's body. - */ - dup = dp->d_ops->data_unused_p(hdr); - dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - - t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; - bf[0].length = cpu_to_be16(t); - dup->length = cpu_to_be16(t); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); - /* - * Log it and return it. - */ - xfs_dir2_data_log_header(args, bp); - xfs_dir2_data_log_unused(args, bp, dup); - *bpp = bp; - return 0; -} - -/* - * Log an active data entry from the block. - */ -void -xfs_dir2_data_log_entry( - struct xfs_da_args *args, - struct xfs_buf *bp, - xfs_dir2_data_entry_t *dep) /* data entry pointer */ -{ - struct xfs_dir2_data_hdr *hdr = bp->b_addr; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - - xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - - (char *)hdr - 1)); -} - -/* - * Log a data block header. - */ -void -xfs_dir2_data_log_header( - struct xfs_da_args *args, - struct xfs_buf *bp) -{ -#ifdef DEBUG - struct xfs_dir2_data_hdr *hdr = bp->b_addr; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); -#endif - - xfs_trans_log_buf(args->trans, bp, 0, - args->dp->d_ops->data_entry_offset - 1); -} - -/* - * Log a data unused entry. - */ -void -xfs_dir2_data_log_unused( - struct xfs_da_args *args, - struct xfs_buf *bp, - xfs_dir2_data_unused_t *dup) /* data unused pointer */ -{ - xfs_dir2_data_hdr_t *hdr = bp->b_addr; - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - - /* - * Log the first part of the unused entry. - */ - xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), - (uint)((char *)&dup->length + sizeof(dup->length) - - 1 - (char *)hdr)); - /* - * Log the end (tag) of the unused entry. - */ - xfs_trans_log_buf(args->trans, bp, - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + - sizeof(xfs_dir2_data_off_t) - 1)); -} - -/* - * Make a byte range in the data block unused. - * Its current contents are unimportant. - */ -void -xfs_dir2_data_make_free( - struct xfs_da_args *args, - struct xfs_buf *bp, - xfs_dir2_data_aoff_t offset, /* starting byte offset */ - xfs_dir2_data_aoff_t len, /* length in bytes */ - int *needlogp, /* out: log header */ - int *needscanp) /* out: regen bestfree */ -{ - xfs_dir2_data_hdr_t *hdr; /* data block pointer */ - xfs_dir2_data_free_t *dfp; /* bestfree pointer */ - char *endptr; /* end of data area */ - int needscan; /* need to regen bestfree */ - xfs_dir2_data_unused_t *newdup; /* new unused entry */ - xfs_dir2_data_unused_t *postdup; /* unused entry after us */ - xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ - struct xfs_dir2_data_free *bf; - - hdr = bp->b_addr; - - /* - * Figure out where the end of the data area is. - */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + args->geo->blksize; - else { - xfs_dir2_block_tail_t *btp; /* block tail */ - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } - /* - * If this isn't the start of the block, then back up to - * the previous entry and see if it's free. - */ - if (offset > args->dp->d_ops->data_entry_offset) { - __be16 *tagp; /* tag just before us */ - - tagp = (__be16 *)((char *)hdr + offset) - 1; - prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); - if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) - prevdup = NULL; - } else - prevdup = NULL; - /* - * If this isn't the end of the block, see if the entry after - * us is free. - */ - if ((char *)hdr + offset + len < endptr) { - postdup = - (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); - if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) - postdup = NULL; - } else - postdup = NULL; - ASSERT(*needscanp == 0); - needscan = 0; - /* - * Previous and following entries are both free, - * merge everything into a single free entry. - */ - bf = args->dp->d_ops->data_bestfree_p(hdr); - if (prevdup && postdup) { - xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ - - /* - * See if prevdup and/or postdup are in bestfree table. - */ - dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); - dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); - /* - * We need a rescan unless there are exactly 2 free entries - * namely our two. Then we know what's happening, otherwise - * since the third bestfree is there, there might be more - * entries. - */ - needscan = (bf[2].length != 0); - /* - * Fix up the new big freespace. - */ - be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); - *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, prevdup); - if (!needscan) { - /* - * Has to be the case that entries 0 and 1 are - * dfp and dfp2 (don't know which is which), and - * entry 2 is empty. - * Remove entry 1 first then entry 0. - */ - ASSERT(dfp && dfp2); - if (dfp == &bf[1]) { - dfp = &bf[0]; - ASSERT(dfp2 == dfp); - dfp2 = &bf[1]; - } - xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - /* - * Now insert the new entry. - */ - dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, - needlogp); - ASSERT(dfp == &bf[0]); - ASSERT(dfp->length == prevdup->length); - ASSERT(!dfp[1].length); - ASSERT(!dfp[2].length); - } - } - /* - * The entry before us is free, merge with it. - */ - else if (prevdup) { - dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); - be16_add_cpu(&prevdup->length, len); - *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, prevdup); - /* - * If the previous entry was in the table, the new entry - * is longer, so it will be in the table too. Remove - * the old one and add the new one. - */ - if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); - } - /* - * Otherwise we need a scan if the new entry is big enough. - */ - else { - needscan = be16_to_cpu(prevdup->length) > - be16_to_cpu(bf[2].length); - } - } - /* - * The following entry is free, merge with it. - */ - else if (postdup) { - dfp = xfs_dir2_data_freefind(hdr, bf, postdup); - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); - newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); - *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup); - /* - * If the following entry was in the table, the new entry - * is longer, so it will be in the table too. Remove - * the old one and add the new one. - */ - if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - } - /* - * Otherwise we need a scan if the new entry is big enough. - */ - else { - needscan = be16_to_cpu(newdup->length) > - be16_to_cpu(bf[2].length); - } - } - /* - * Neither neighbor is free. Make a new entry. - */ - else { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); - newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - newdup->length = cpu_to_be16(len); - *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup); - xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - } - *needscanp = needscan; -} - -/* - * Take a byte range out of an existing unused space and make it un-free. - */ -void -xfs_dir2_data_use_free( - struct xfs_da_args *args, - struct xfs_buf *bp, - xfs_dir2_data_unused_t *dup, /* unused entry */ - xfs_dir2_data_aoff_t offset, /* starting offset to use */ - xfs_dir2_data_aoff_t len, /* length to use */ - int *needlogp, /* out: need to log header */ - int *needscanp) /* out: need regen bestfree */ -{ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_data_free_t *dfp; /* bestfree pointer */ - int matchback; /* matches end of freespace */ - int matchfront; /* matches start of freespace */ - int needscan; /* need to regen bestfree */ - xfs_dir2_data_unused_t *newdup; /* new unused entry */ - xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ - int oldlen; /* old unused entry's length */ - struct xfs_dir2_data_free *bf; - - hdr = bp->b_addr; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)hdr); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); - ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - /* - * Look up the entry in the bestfree table. - */ - oldlen = be16_to_cpu(dup->length); - bf = args->dp->d_ops->data_bestfree_p(hdr); - dfp = xfs_dir2_data_freefind(hdr, bf, dup); - ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); - /* - * Check for alignment with front and back of the entry. - */ - matchfront = (char *)dup - (char *)hdr == offset; - matchback = (char *)dup + oldlen - (char *)hdr == offset + len; - ASSERT(*needscanp == 0); - needscan = 0; - /* - * If we matched it exactly we just need to get rid of it from - * the bestfree table. - */ - if (matchfront && matchback) { - if (dfp) { - needscan = (bf[2].offset != 0); - if (!needscan) - xfs_dir2_data_freeremove(hdr, bf, dfp, - needlogp); - } - } - /* - * We match the first part of the entry. - * Make a new entry with the remaining freespace. - */ - else if (matchfront) { - newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); - newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - newdup->length = cpu_to_be16(oldlen - len); - *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup); - /* - * If it was in the table, remove it and add the new one. - */ - if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); - /* - * If we got inserted at the last slot, - * that means we don't know if there was a better - * choice for the last slot, or not. Rescan. - */ - needscan = dfp == &bf[2]; - } - } - /* - * We match the last part of the entry. - * Trim the allocated space off the tail of the entry. - */ - else if (matchback) { - newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); - *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup); - /* - * If it was in the table, remove it and add the new one. - */ - if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); - /* - * If we got inserted at the last slot, - * that means we don't know if there was a better - * choice for the last slot, or not. Rescan. - */ - needscan = dfp == &bf[2]; - } - } - /* - * Poking out the middle of an entry. - * Make two new entries. - */ - else { - newdup = dup; - newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); - *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup); - newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); - newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); - *xfs_dir2_data_unused_tag_p(newdup2) = - cpu_to_be16((char *)newdup2 - (char *)hdr); - xfs_dir2_data_log_unused(args, bp, newdup2); - /* - * If the old entry was in the table, we need to scan - * if the 3rd entry was valid, since these entries - * are smaller than the old one. - * If we don't need to scan that means there were 1 or 2 - * entries in the table, and removing the old and adding - * the 2 new will work. - */ - if (dfp) { - needscan = (bf[2].length != 0); - if (!needscan) { - xfs_dir2_data_freeremove(hdr, bf, dfp, - needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup2, - needlogp); - } - } - } - *needscanp = needscan; -} diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c deleted file mode 100644 index 78b411bfc543..000000000000 --- a/fs/xfs/xfs_dir2_leaf.c +++ /dev/null @@ -1,1831 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" - -/* - * Local function declarations. - */ -static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, - int *indexp, struct xfs_buf **dbpp); -static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, - struct xfs_buf *bp, int first, int last); -static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, - struct xfs_buf *bp); - -/* - * Check the internal consistency of a leaf1 block. - * Pop an assert if something is wrong. - */ -#ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -STATIC bool -xfs_dir3_leaf1_check( - struct xfs_inode *dp, - struct xfs_buf *bp) -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { - struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; - } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) - return false; - - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); -} -#else -#define xfs_dir3_leaf_check(dp, bp) -#endif - -bool -xfs_dir3_leaf_check_int( - struct xfs_mount *mp, - struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, - struct xfs_dir2_leaf *leaf) -{ - struct xfs_dir2_leaf_entry *ents; - xfs_dir2_leaf_tail_t *ltp; - int stale; - int i; - const struct xfs_dir_ops *ops; - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_da_geometry *geo = mp->m_dir_geo; - - /* - * we can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. - */ - ops = xfs_dir_get_ops(mp, dp); - - if (!hdr) { - ops->leaf_hdr_from_disk(&leafhdr, leaf); - hdr = &leafhdr; - } - - ents = ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(geo, leaf); - - /* - * XXX (dgc): This value is not restrictive enough. - * Should factor in the size of the bests table as well. - * We can deduce a value for that from di_size. - */ - if (hdr->count > ops->leaf_max_ents(geo)) - return false; - - /* Leaves and bests don't overlap in leaf format. */ - if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR3_LEAF1_MAGIC) && - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) - return false; - - /* Check hash value order, count stale entries. */ - for (i = stale = 0; i < hdr->count; i++) { - if (i + 1 < hdr->count) { - if (be32_to_cpu(ents[i].hashval) > - be32_to_cpu(ents[i + 1].hashval)) - return false; - } - if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - stale++; - } - if (hdr->stale != stale) - return false; - return true; -} - -/* - * We verify the magic numbers before decoding the leaf header so that on debug - * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due - * to incorrect magic numbers. - */ -static bool -xfs_dir3_leaf_verify( - struct xfs_buf *bp, - __uint16_t magic) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dir2_leaf *leaf = bp->b_addr; - - ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - __uint16_t magic3; - - magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC - : XFS_DIR3_LEAFN_MAGIC; - - if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) - return false; - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; - } else { - if (leaf->hdr.info.magic != cpu_to_be16(magic)) - return false; - } - - return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); -} - -static void -__read_verify( - struct xfs_buf *bp, - __uint16_t magic) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -__write_verify( - struct xfs_buf *bp, - __uint16_t magic) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); -} - -static void -xfs_dir3_leaf1_read_verify( - struct xfs_buf *bp) -{ - __read_verify(bp, XFS_DIR2_LEAF1_MAGIC); -} - -static void -xfs_dir3_leaf1_write_verify( - struct xfs_buf *bp) -{ - __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); -} - -static void -xfs_dir3_leafn_read_verify( - struct xfs_buf *bp) -{ - __read_verify(bp, XFS_DIR2_LEAFN_MAGIC); -} - -static void -xfs_dir3_leafn_write_verify( - struct xfs_buf *bp) -{ - __write_verify(bp, XFS_DIR2_LEAFN_MAGIC); -} - -const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { - .verify_read = xfs_dir3_leaf1_read_verify, - .verify_write = xfs_dir3_leaf1_write_verify, -}; - -const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { - .verify_read = xfs_dir3_leafn_read_verify, - .verify_write = xfs_dir3_leafn_write_verify, -}; - -static int -xfs_dir3_leaf_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t fbno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp) -{ - int err; - - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); - if (!err && tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); - return err; -} - -int -xfs_dir3_leafn_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t fbno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp) -{ - int err; - - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); - if (!err && tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); - return err; -} - -/* - * Initialize a new leaf block, leaf1 or leafn magic accepted. - */ -static void -xfs_dir3_leaf_init( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *bp, - xfs_ino_t owner, - __uint16_t type) -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - - ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - - memset(leaf3, 0, sizeof(*leaf3)); - - leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC) - ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) - : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); - leaf3->info.blkno = cpu_to_be64(bp->b_bn); - leaf3->info.owner = cpu_to_be64(owner); - uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid); - } else { - memset(leaf, 0, sizeof(*leaf)); - leaf->hdr.info.magic = cpu_to_be16(type); - } - - /* - * If it's a leaf-format directory initialize the tail. - * Caller is responsible for initialising the bests table. - */ - if (type == XFS_DIR2_LEAF1_MAGIC) { - struct xfs_dir2_leaf_tail *ltp; - - ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf); - ltp->bestcount = 0; - bp->b_ops = &xfs_dir3_leaf1_buf_ops; - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); - } else { - bp->b_ops = &xfs_dir3_leafn_buf_ops; - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); - } -} - -int -xfs_dir3_leaf_get_buf( - xfs_da_args_t *args, - xfs_dir2_db_t bno, - struct xfs_buf **bpp, - __uint16_t magic) -{ - struct xfs_inode *dp = args->dp; - struct xfs_trans *tp = args->trans; - struct xfs_mount *mp = dp->i_mount; - struct xfs_buf *bp; - int error; - - ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); - ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && - bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); - - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), - -1, &bp, XFS_DATA_FORK); - if (error) - return error; - - xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); - xfs_dir3_leaf_log_header(args, bp); - if (magic == XFS_DIR2_LEAF1_MAGIC) - xfs_dir3_leaf_log_tail(args, bp); - *bpp = bp; - return 0; -} - -/* - * Convert a block form directory to a leaf form directory. - */ -int /* error */ -xfs_dir2_block_to_leaf( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *dbp) /* input block's buffer */ -{ - __be16 *bestsp; /* leaf's bestsp entries */ - xfs_dablk_t blkno; /* leaf block's bno */ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ - xfs_dir2_block_tail_t *btp; /* block's tail */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - struct xfs_buf *lbp; /* leaf block's buffer */ - xfs_dir2_db_t ldb; /* leaf block's bno */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log block header */ - int needscan; /* need to rescan bestfree */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_data_free *bf; - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - trace_xfs_dir2_block_to_leaf(args); - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - /* - * Add the leaf block to the inode. - * This interface will only put blocks in the leaf/node range. - * Since that's empty now, we'll get the root (block 0 in range). - */ - if ((error = xfs_da_grow_inode(args, &blkno))) { - return error; - } - ldb = xfs_dir2_da_to_db(args->geo, blkno); - ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)); - /* - * Initialize the leaf block, get a buffer for it. - */ - error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC); - if (error) - return error; - - leaf = lbp->b_addr; - hdr = dbp->b_addr; - xfs_dir3_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - bf = dp->d_ops->data_bestfree_p(hdr); - ents = dp->d_ops->leaf_ents_p(leaf); - - /* - * Set the counts in the leaf header. - */ - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - leafhdr.count = be32_to_cpu(btp->count); - leafhdr.stale = be32_to_cpu(btp->stale); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, lbp); - - /* - * Could compact these but I think we always do the conversion - * after squeezing out stale entries. - */ - memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); - needscan = 0; - needlog = 1; - /* - * Make the space formerly occupied by the leaf entries and block - * tail be free. - */ - xfs_dir2_data_make_free(args, dbp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize - - (char *)blp), - &needlog, &needscan); - /* - * Fix up the block header, make it a data block. - */ - dbp->b_ops = &xfs_dir3_data_buf_ops; - xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - else - hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); - - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - /* - * Set up leaf tail and bests table. - */ - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ltp->bestcount = cpu_to_be32(1); - bestsp = xfs_dir2_leaf_bests_p(ltp); - bestsp[0] = bf[0].length; - /* - * Log the data header and leaf bests table. - */ - if (needlog) - xfs_dir2_data_log_header(args, dbp); - xfs_dir3_leaf_check(dp, lbp); - xfs_dir3_data_check(dp, dbp); - xfs_dir3_leaf_log_bests(args, lbp, 0, 0); - return 0; -} - -STATIC void -xfs_dir3_leaf_find_stale( - struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_dir2_leaf_entry *ents, - int index, - int *lowstale, - int *highstale) -{ - /* - * Find the first stale entry before our index, if any. - */ - for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { - if (ents[*lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - } - - /* - * Find the first stale entry at or after our index, if any. - * Stop if the result would require moving more entries than using - * lowstale. - */ - for (*highstale = index; *highstale < leafhdr->count; ++*highstale) { - if (ents[*highstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - break; - if (*lowstale >= 0 && index - *lowstale <= *highstale - index) - break; - } -} - -struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_find_entry( - struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_dir2_leaf_entry *ents, - int index, /* leaf table position */ - int compact, /* need to compact leaves */ - int lowstale, /* index of prev stale leaf */ - int highstale, /* index of next stale leaf */ - int *lfloglow, /* low leaf logging index */ - int *lfloghigh) /* high leaf logging index */ -{ - if (!leafhdr->stale) { - xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ - - /* - * Now we need to make room to insert the leaf entry. - * - * If there are no stale entries, just insert a hole at index. - */ - lep = &ents[index]; - if (index < leafhdr->count) - memmove(lep + 1, lep, - (leafhdr->count - index) * sizeof(*lep)); - - /* - * Record low and high logging indices for the leaf. - */ - *lfloglow = index; - *lfloghigh = leafhdr->count++; - return lep; - } - - /* - * There are stale entries. - * - * We will use one of them for the new entry. It's probably not at - * the right location, so we'll have to shift some up or down first. - * - * If we didn't compact before, we need to find the nearest stale - * entries before and after our insertion point. - */ - if (compact == 0) - xfs_dir3_leaf_find_stale(leafhdr, ents, index, - &lowstale, &highstale); - - /* - * If the low one is better, use it. - */ - if (lowstale >= 0 && - (highstale == leafhdr->count || - index - lowstale - 1 < highstale - index)) { - ASSERT(index - lowstale - 1 >= 0); - ASSERT(ents[lowstale].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries up to cover the stale entry and make room - * for the new entry. - */ - if (index - lowstale - 1 > 0) { - memmove(&ents[lowstale], &ents[lowstale + 1], - (index - lowstale - 1) * - sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(lowstale, *lfloglow); - *lfloghigh = MAX(index - 1, *lfloghigh); - leafhdr->stale--; - return &ents[index - 1]; - } - - /* - * The high one is better, so use that one. - */ - ASSERT(highstale - index >= 0); - ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); - - /* - * Copy entries down to cover the stale entry and make room for the - * new entry. - */ - if (highstale - index > 0) { - memmove(&ents[index + 1], &ents[index], - (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); - } - *lfloglow = MIN(index, *lfloglow); - *lfloghigh = MAX(highstale, *lfloghigh); - leafhdr->stale--; - return &ents[index]; -} - -/* - * Add an entry to a leaf form directory. - */ -int /* error */ -xfs_dir2_leaf_addname( - xfs_da_args_t *args) /* operation arguments */ -{ - __be16 *bestsp; /* freespace table in leaf */ - int compact; /* need to compact leaves */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data block entry */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* data unused entry */ - int error; /* error return value */ - int grown; /* allocated new data block */ - int highstale; /* index of next stale leaf */ - int i; /* temporary, index */ - int index; /* leaf table position */ - struct xfs_buf *lbp; /* leaf's buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - int length; /* length of new entry */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ - int lfloglow; /* low leaf logging index */ - int lfloghigh; /* high leaf logging index */ - int lowstale; /* index of prev stale leaf */ - xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ - xfs_mount_t *mp; /* filesystem mount point */ - int needbytes; /* leaf block bytes needed */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data free */ - __be16 *tagp; /* end of data entry */ - xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_db_t use_block; /* data block number */ - struct xfs_dir2_data_free *bf; /* bestfree table */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - trace_xfs_dir2_leaf_addname(args); - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - - error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); - if (error) - return error; - - /* - * Look up the entry by hash value and name. - * We know it's not there, our caller has already done a lookup. - * So the index is of the entry to insert in front of. - * But if there are dup hash values the index is of the first of those. - */ - index = xfs_dir2_leaf_search_hash(args, lbp); - leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - bestsp = xfs_dir2_leaf_bests_p(ltp); - length = dp->d_ops->data_entsize(args->namelen); - - /* - * See if there are any entries with the same hash value - * and space in their block for the new entry. - * This is good because it puts multiple same-hash value entries - * in a data block, improving the lookup of those entries. - */ - for (use_block = -1, lep = &ents[index]; - index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; - index++, lep++) { - if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) - continue; - i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); - ASSERT(i < be32_to_cpu(ltp->bestcount)); - ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); - if (be16_to_cpu(bestsp[i]) >= length) { - use_block = i; - break; - } - } - /* - * Didn't find a block yet, linear search all the data blocks. - */ - if (use_block == -1) { - for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) { - /* - * Remember a block we see that's missing. - */ - if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && - use_block == -1) - use_block = i; - else if (be16_to_cpu(bestsp[i]) >= length) { - use_block = i; - break; - } - } - } - /* - * How many bytes do we need in the leaf block? - */ - needbytes = 0; - if (!leafhdr.stale) - needbytes += sizeof(xfs_dir2_leaf_entry_t); - if (use_block == -1) - needbytes += sizeof(xfs_dir2_data_off_t); - - /* - * Now kill use_block if it refers to a missing block, so we - * can use it as an indication of allocation needed. - */ - if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) - use_block = -1; - /* - * If we don't have enough free bytes but we can make enough - * by compacting out stale entries, we'll do that. - */ - if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes && - leafhdr.stale > 1) - compact = 1; - - /* - * Otherwise if we don't have enough free bytes we need to - * convert to node form. - */ - else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) { - /* - * Just checking or no space reservation, give up. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || - args->total == 0) { - xfs_trans_brelse(tp, lbp); - return ENOSPC; - } - /* - * Convert to node form. - */ - error = xfs_dir2_leaf_to_node(args, lbp); - if (error) - return error; - /* - * Then add the new entry. - */ - return xfs_dir2_node_addname(args); - } - /* - * Otherwise it will fit without compaction. - */ - else - compact = 0; - /* - * If just checking, then it will fit unless we needed to allocate - * a new data block. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - xfs_trans_brelse(tp, lbp); - return use_block == -1 ? ENOSPC : 0; - } - /* - * If no allocations are allowed, return now before we've - * changed anything. - */ - if (args->total == 0 && use_block == -1) { - xfs_trans_brelse(tp, lbp); - return ENOSPC; - } - /* - * Need to compact the leaf entries, removing stale ones. - * Leave one stale entry behind - the one closest to our - * insertion index - and we'll shift that one to our insertion - * point later. - */ - if (compact) { - xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, - &highstale, &lfloglow, &lfloghigh); - } - /* - * There are stale entries, so we'll need log-low and log-high - * impossibly bad values later. - */ - else if (leafhdr.stale) { - lfloglow = leafhdr.count; - lfloghigh = -1; - } - /* - * If there was no data block space found, we need to allocate - * a new one. - */ - if (use_block == -1) { - /* - * Add the new data block. - */ - if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, - &use_block))) { - xfs_trans_brelse(tp, lbp); - return error; - } - /* - * Initialize the block. - */ - if ((error = xfs_dir3_data_init(args, use_block, &dbp))) { - xfs_trans_brelse(tp, lbp); - return error; - } - /* - * If we're adding a new data block on the end we need to - * extend the bests table. Copy it up one entry. - */ - if (use_block >= be32_to_cpu(ltp->bestcount)) { - bestsp--; - memmove(&bestsp[0], &bestsp[1], - be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); - be32_add_cpu(<p->bestcount, 1); - xfs_dir3_leaf_log_tail(args, lbp); - xfs_dir3_leaf_log_bests(args, lbp, 0, - be32_to_cpu(ltp->bestcount) - 1); - } - /* - * If we're filling in a previously empty block just log it. - */ - else - xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - bestsp[use_block] = bf[0].length; - grown = 1; - } else { - /* - * Already had space in some data block. - * Just read that one in. - */ - error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(args->geo, use_block), - -1, &dbp); - if (error) { - xfs_trans_brelse(tp, lbp); - return error; - } - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - grown = 0; - } - /* - * Point to the biggest freespace in our data block. - */ - dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); - ASSERT(be16_to_cpu(dup->length) >= length); - needscan = needlog = 0; - /* - * Mark the initial part of our freespace in use for the new entry. - */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); - /* - * Initialize our new entry (at last). - */ - dep = (xfs_dir2_data_entry_t *)dup; - dep->inumber = cpu_to_be64(args->inumber); - dep->namelen = args->namelen; - memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - /* - * Need to scan fix up the bestfree table. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - /* - * Need to log the data block's header. - */ - if (needlog) - xfs_dir2_data_log_header(args, dbp); - xfs_dir2_data_log_entry(args, dbp, dep); - /* - * If the bests table needs to be changed, do it. - * Log the change unless we've already done that. - */ - if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) { - bestsp[use_block] = bf[0].length; - if (!grown) - xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); - } - - lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - - /* - * Fill in the new leaf entry. - */ - lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32( - xfs_dir2_db_off_to_dataptr(args->geo, use_block, - be16_to_cpu(*tagp))); - /* - * Log the leaf fields and give up the buffers. - */ - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, lbp); - xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(dp, lbp); - xfs_dir3_data_check(dp, dbp); - return 0; -} - -/* - * Compact out any stale entries in the leaf. - * Log the header and changed leaf entries, if any. - */ -void -xfs_dir3_leaf_compact( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_buf *bp) /* leaf buffer */ -{ - int from; /* source leaf index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - int loglow; /* first leaf entry to log */ - int to; /* target leaf index */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_inode *dp = args->dp; - - leaf = bp->b_addr; - if (!leafhdr->stale) - return; - - /* - * Compress out the stale entries in place. - */ - ents = dp->d_ops->leaf_ents_p(leaf); - for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { - if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - continue; - /* - * Only actually copy the entries that are different. - */ - if (from > to) { - if (loglow == -1) - loglow = to; - ents[to] = ents[from]; - } - to++; - } - /* - * Update and log the header, log the leaf entries. - */ - ASSERT(leafhdr->stale == from - to); - leafhdr->count -= leafhdr->stale; - leafhdr->stale = 0; - - dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); - xfs_dir3_leaf_log_header(args, bp); - if (loglow != -1) - xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); -} - -/* - * Compact the leaf entries, removing stale ones. - * Leave one stale entry behind - the one closest to our - * insertion index - and the caller will shift that one to our insertion - * point later. - * Return new insertion index, where the remaining stale entry is, - * and leaf logging indices. - */ -void -xfs_dir3_leaf_compact_x1( - struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_dir2_leaf_entry *ents, - int *indexp, /* insertion index */ - int *lowstalep, /* out: stale entry before us */ - int *highstalep, /* out: stale entry after us */ - int *lowlogp, /* out: low log index */ - int *highlogp) /* out: high log index */ -{ - int from; /* source copy index */ - int highstale; /* stale entry at/after index */ - int index; /* insertion index */ - int keepstale; /* source index of kept stale */ - int lowstale; /* stale entry before index */ - int newindex=0; /* new insertion index */ - int to; /* destination copy index */ - - ASSERT(leafhdr->stale > 1); - index = *indexp; - - xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale); - - /* - * Pick the better of lowstale and highstale. - */ - if (lowstale >= 0 && - (highstale == leafhdr->count || - index - lowstale <= highstale - index)) - keepstale = lowstale; - else - keepstale = highstale; - /* - * Copy the entries in place, removing all the stale entries - * except keepstale. - */ - for (from = to = 0; from < leafhdr->count; from++) { - /* - * Notice the new value of index. - */ - if (index == from) - newindex = to; - if (from != keepstale && - ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { - if (from == to) - *lowlogp = to; - continue; - } - /* - * Record the new keepstale value for the insertion. - */ - if (from == keepstale) - lowstale = highstale = to; - /* - * Copy only the entries that have moved. - */ - if (from > to) - ents[to] = ents[from]; - to++; - } - ASSERT(from > to); - /* - * If the insertion point was past the last entry, - * set the new insertion point accordingly. - */ - if (index == from) - newindex = to; - *indexp = newindex; - /* - * Adjust the leaf header values. - */ - leafhdr->count -= from - to; - leafhdr->stale = 1; - /* - * Remember the low/high stale value only in the "right" - * direction. - */ - if (lowstale >= newindex) - lowstale = -1; - else - highstale = leafhdr->count; - *highlogp = leafhdr->count - 1; - *lowstalep = lowstale; - *highstalep = highstale; -} - -/* - * Log the bests entries indicated from a leaf1 block. - */ -static void -xfs_dir3_leaf_log_bests( - struct xfs_da_args *args, - struct xfs_buf *bp, /* leaf buffer */ - int first, /* first entry to log */ - int last) /* last entry to log */ -{ - __be16 *firstb; /* pointer to first entry */ - __be16 *lastb; /* pointer to last entry */ - struct xfs_dir2_leaf *leaf = bp->b_addr; - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); - - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - firstb = xfs_dir2_leaf_bests_p(ltp) + first; - lastb = xfs_dir2_leaf_bests_p(ltp) + last; - xfs_trans_log_buf(args->trans, bp, - (uint)((char *)firstb - (char *)leaf), - (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); -} - -/* - * Log the leaf entries indicated from a leaf1 or leafn block. - */ -void -xfs_dir3_leaf_log_ents( - struct xfs_da_args *args, - struct xfs_buf *bp, - int first, - int last) -{ - xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ - xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ - struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir2_leaf_entry *ents; - - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - - ents = args->dp->d_ops->leaf_ents_p(leaf); - firstlep = &ents[first]; - lastlep = &ents[last]; - xfs_trans_log_buf(args->trans, bp, - (uint)((char *)firstlep - (char *)leaf), - (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); -} - -/* - * Log the header of the leaf1 or leafn block. - */ -void -xfs_dir3_leaf_log_header( - struct xfs_da_args *args, - struct xfs_buf *bp) -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - - xfs_trans_log_buf(args->trans, bp, - (uint)((char *)&leaf->hdr - (char *)leaf), - args->dp->d_ops->leaf_hdr_size - 1); -} - -/* - * Log the tail of the leaf1 block. - */ -STATIC void -xfs_dir3_leaf_log_tail( - struct xfs_da_args *args, - struct xfs_buf *bp) -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), - (uint)(args->geo->blksize - 1)); -} - -/* - * Look up the entry referred to by args in the leaf format directory. - * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which - * is also used by the node-format code. - */ -int -xfs_dir2_leaf_lookup( - xfs_da_args_t *args) /* operation arguments */ -{ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data block entry */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - int index; /* found entry index */ - struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; - - trace_xfs_dir2_leaf_lookup(args); - - /* - * Look up name in the leaf block, returning both buffers and index. - */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { - return error; - } - tp = args->trans; - dp = args->dp; - xfs_dir3_leaf_check(dp, lbp); - leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); - /* - * Get to the leaf entry and contained data entry address. - */ - lep = &ents[index]; - - /* - * Point to the data entry. - */ - dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); - /* - * Return the found inode number & CI name if appropriate - */ - args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); - error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_trans_brelse(tp, dbp); - xfs_trans_brelse(tp, lbp); - return error; -} - -/* - * Look up name/hash in the leaf block. - * Fill in indexp with the found index, and dbpp with the data buffer. - * If not found dbpp will be NULL, and ENOENT comes back. - * lbpp will always be filled in with the leaf buffer unless there's an error. - */ -static int /* error */ -xfs_dir2_leaf_lookup_int( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf **lbpp, /* out: leaf buffer */ - int *indexp, /* out: index in leaf block */ - struct xfs_buf **dbpp) /* out: data buffer */ -{ - xfs_dir2_db_t curdb = -1; /* current data block number */ - struct xfs_buf *dbp = NULL; /* data buffer */ - xfs_dir2_data_entry_t *dep; /* data entry */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - int index; /* index in leaf block */ - struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_db_t newdb; /* new data block number */ - xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_db_t cidb = -1; /* case match data block no. */ - enum xfs_dacmp cmp; /* name compare result */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - - error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); - if (error) - return error; - - *lbpp = lbp; - leaf = lbp->b_addr; - xfs_dir3_leaf_check(dp, lbp); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - /* - * Look for the first leaf entry with our hash value. - */ - index = xfs_dir2_leaf_search_hash(args, lbp); - /* - * Loop over all the entries with the right hash value - * looking to match the name. - */ - for (lep = &ents[index]; - index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; - lep++, index++) { - /* - * Skip over stale leaf entries. - */ - if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) - continue; - /* - * Get the new data block number. - */ - newdb = xfs_dir2_dataptr_to_db(args->geo, - be32_to_cpu(lep->address)); - /* - * If it's not the same as the old data block number, - * need to pitch the old one and read the new one. - */ - if (newdb != curdb) { - if (dbp) - xfs_trans_brelse(tp, dbp); - error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(args->geo, newdb), - -1, &dbp); - if (error) { - xfs_trans_brelse(tp, lbp); - return error; - } - curdb = newdb; - } - /* - * Point to the data entry. - */ - dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(lep->address))); - /* - * Compare name and if it's an exact match, return the index - * and buffer. If it's the first case-insensitive match, store - * the index and buffer and continue looking for an exact match. - */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); - if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { - args->cmpresult = cmp; - *indexp = index; - /* case exact match: return the current buffer. */ - if (cmp == XFS_CMP_EXACT) { - *dbpp = dbp; - return 0; - } - cidb = curdb; - } - } - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - /* - * Here, we can only be doing a lookup (not a rename or remove). - * If a case-insensitive match was found earlier, re-read the - * appropriate data block if required and return it. - */ - if (args->cmpresult == XFS_CMP_CASE) { - ASSERT(cidb != -1); - if (cidb != curdb) { - xfs_trans_brelse(tp, dbp); - error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(args->geo, cidb), - -1, &dbp); - if (error) { - xfs_trans_brelse(tp, lbp); - return error; - } - } - *dbpp = dbp; - return 0; - } - /* - * No match found, return ENOENT. - */ - ASSERT(cidb == -1); - if (dbp) - xfs_trans_brelse(tp, dbp); - xfs_trans_brelse(tp, lbp); - return ENOENT; -} - -/* - * Remove an entry from a leaf format directory. - */ -int /* error */ -xfs_dir2_leaf_removename( - xfs_da_args_t *args) /* operation arguments */ -{ - __be16 *bestsp; /* leaf block best freespace */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_db_t db; /* data block number */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data entry structure */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - xfs_dir2_db_t i; /* temporary data block # */ - int index; /* index into leaf entries */ - struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data frees */ - xfs_dir2_data_off_t oldbest; /* old value of best free */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_data_free *bf; /* bestfree table */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - trace_xfs_dir2_leaf_removename(args); - - /* - * Lookup the leaf entry, get the leaf and data blocks read in. - */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { - return error; - } - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - leaf = lbp->b_addr; - hdr = dbp->b_addr; - xfs_dir3_data_check(dp, dbp); - bf = dp->d_ops->data_bestfree_p(hdr); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - /* - * Point to the leaf entry, use that to point to the data entry. - */ - lep = &ents[index]; - db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); - dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); - needscan = needlog = 0; - oldbest = be16_to_cpu(bf[0].length); - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - bestsp = xfs_dir2_leaf_bests_p(ltp); - ASSERT(be16_to_cpu(bestsp[db]) == oldbest); - /* - * Mark the former data entry unused. - */ - xfs_dir2_data_make_free(args, dbp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); - /* - * We just mark the leaf entry stale by putting a null in it. - */ - leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, lbp); - - lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(args, lbp, index, index); - - /* - * Scan the freespace in the data block again if necessary, - * log the data block header if necessary. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - if (needlog) - xfs_dir2_data_log_header(args, dbp); - /* - * If the longest freespace in the data block has changed, - * put the new value in the bests table and log that. - */ - if (be16_to_cpu(bf[0].length) != oldbest) { - bestsp[db] = bf[0].length; - xfs_dir3_leaf_log_bests(args, lbp, db, db); - } - xfs_dir3_data_check(dp, dbp); - /* - * If the data block is now empty then get rid of the data block. - */ - if (be16_to_cpu(bf[0].length) == - args->geo->blksize - dp->d_ops->data_entry_offset) { - ASSERT(db != args->geo->datablk); - if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { - /* - * Nope, can't get rid of it because it caused - * allocation of a bmap btree block to do so. - * Just go on, returning success, leaving the - * empty block in place. - */ - if (error == ENOSPC && args->total == 0) - error = 0; - xfs_dir3_leaf_check(dp, lbp); - return error; - } - dbp = NULL; - /* - * If this is the last data block then compact the - * bests table by getting rid of entries. - */ - if (db == be32_to_cpu(ltp->bestcount) - 1) { - /* - * Look for the last active entry (i). - */ - for (i = db - 1; i > 0; i--) { - if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) - break; - } - /* - * Copy the table down so inactive entries at the - * end are removed. - */ - memmove(&bestsp[db - i], bestsp, - (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); - be32_add_cpu(<p->bestcount, -(db - i)); - xfs_dir3_leaf_log_tail(args, lbp); - xfs_dir3_leaf_log_bests(args, lbp, 0, - be32_to_cpu(ltp->bestcount) - 1); - } else - bestsp[db] = cpu_to_be16(NULLDATAOFF); - } - /* - * If the data block was not the first one, drop it. - */ - else if (db != args->geo->datablk) - dbp = NULL; - - xfs_dir3_leaf_check(dp, lbp); - /* - * See if we can convert to block form. - */ - return xfs_dir2_leaf_to_block(args, lbp, dbp); -} - -/* - * Replace the inode number in a leaf format directory entry. - */ -int /* error */ -xfs_dir2_leaf_replace( - xfs_da_args_t *args) /* operation arguments */ -{ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data block entry */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - int index; /* index of leaf entry */ - struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; - - trace_xfs_dir2_leaf_replace(args); - - /* - * Look up the entry. - */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { - return error; - } - dp = args->dp; - leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); - /* - * Point to the leaf entry, get data address from it. - */ - lep = &ents[index]; - /* - * Point to the data entry. - */ - dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); - ASSERT(args->inumber != be64_to_cpu(dep->inumber)); - /* - * Put the new inode number in, log it. - */ - dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); - tp = args->trans; - xfs_dir2_data_log_entry(args, dbp, dep); - xfs_dir3_leaf_check(dp, lbp); - xfs_trans_brelse(tp, lbp); - return 0; -} - -/* - * Return index in the leaf block (lbp) which is either the first - * one with this hash value, or if there are none, the insert point - * for that hash value. - */ -int /* index value */ -xfs_dir2_leaf_search_hash( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *lbp) /* leaf buffer */ -{ - xfs_dahash_t hash=0; /* hash from this entry */ - xfs_dahash_t hashwant; /* hash value looking for */ - int high; /* high leaf index */ - int low; /* low leaf index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - int mid=0; /* current leaf index */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - leaf = lbp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); - args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - /* - * Note, the table cannot be empty, so we have to go through the loop. - * Binary search the leaf entries looking for our hash value. - */ - for (lep = ents, low = 0, high = leafhdr.count - 1, - hashwant = args->hashval; - low <= high; ) { - mid = (low + high) >> 1; - if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant) - break; - if (hash < hashwant) - low = mid + 1; - else - high = mid - 1; - } - /* - * Found one, back up through all the equal hash values. - */ - if (hash == hashwant) { - while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) { - mid--; - } - } - /* - * Need to point to an entry higher than ours. - */ - else if (hash < hashwant) - mid++; - return mid; -} - -/* - * Trim off a trailing data block. We know it's empty since the leaf - * freespace table says so. - */ -int /* error */ -xfs_dir2_leaf_trim_data( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *lbp, /* leaf buffer */ - xfs_dir2_db_t db) /* data block number */ -{ - __be16 *bestsp; /* leaf bests table */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - /* - * Read the offending data block. We need its buffer. - */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), - -1, &dbp); - if (error) - return error; - - leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - -#ifdef DEBUG -{ - struct xfs_dir2_data_hdr *hdr = dbp->b_addr; - struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); - - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); - ASSERT(be16_to_cpu(bf[0].length) == - args->geo->blksize - dp->d_ops->data_entry_offset); - ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); -} -#endif - - /* - * Get rid of the data block. - */ - if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { - ASSERT(error != ENOSPC); - xfs_trans_brelse(tp, dbp); - return error; - } - /* - * Eliminate the last bests entry from the table. - */ - bestsp = xfs_dir2_leaf_bests_p(ltp); - be32_add_cpu(<p->bestcount, -1); - memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); - xfs_dir3_leaf_log_tail(args, lbp); - xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); - return 0; -} - -static inline size_t -xfs_dir3_leaf_size( - struct xfs_dir3_icleaf_hdr *hdr, - int counts) -{ - int entries; - int hdrsize; - - entries = hdr->count - hdr->stale; - if (hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR2_LEAFN_MAGIC) - hdrsize = sizeof(struct xfs_dir2_leaf_hdr); - else - hdrsize = sizeof(struct xfs_dir3_leaf_hdr); - - return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t) - + counts * sizeof(xfs_dir2_data_off_t) - + sizeof(xfs_dir2_leaf_tail_t); -} - -/* - * Convert node form directory to leaf form directory. - * The root of the node form dir needs to already be a LEAFN block. - * Just return if we can't do anything. - */ -int /* error */ -xfs_dir2_node_to_leaf( - xfs_da_state_t *state) /* directory operation state */ -{ - xfs_da_args_t *args; /* operation arguments */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - struct xfs_buf *fbp; /* buffer for freespace block */ - xfs_fileoff_t fo; /* freespace file offset */ - xfs_dir2_free_t *free; /* freespace structure */ - struct xfs_buf *lbp; /* buffer for leaf block */ - xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_mount_t *mp; /* filesystem mount point */ - int rval; /* successful free trim? */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir3_icfree_hdr freehdr; - - /* - * There's more than a leaf level in the btree, so there must - * be multiple leafn blocks. Give up. - */ - if (state->path.active > 1) - return 0; - args = state->args; - - trace_xfs_dir2_node_to_leaf(args); - - mp = state->mp; - dp = args->dp; - tp = args->trans; - /* - * Get the last offset in the file. - */ - if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) { - return error; - } - fo -= args->geo->fsbcount; - /* - * If there are freespace blocks other than the first one, - * take this opportunity to remove trailing empty freespace blocks - * that may have been left behind during no-space-reservation - * operations. - */ - while (fo > args->geo->freeblk) { - if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) { - return error; - } - if (rval) - fo -= args->geo->fsbcount; - else - return 0; - } - /* - * Now find the block just before the freespace block. - */ - if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) { - return error; - } - /* - * If it's not the single leaf block, give up. - */ - if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize) - return 0; - lbp = state->path.blk[0].bp; - leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || - leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); - - /* - * Read the freespace block. - */ - error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); - if (error) - return error; - free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - ASSERT(!freehdr.firstdb); - - /* - * Now see if the leafn and free data will fit in a leaf1. - * If not, release the buffer and give up. - */ - if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) { - xfs_trans_brelse(tp, fbp); - return 0; - } - - /* - * If the leaf has any stale entries in it, compress them out. - */ - if (leafhdr.stale) - xfs_dir3_leaf_compact(args, &leafhdr, lbp); - - lbp->b_ops = &xfs_dir3_leaf1_buf_ops; - xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF); - leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC) - ? XFS_DIR2_LEAF1_MAGIC - : XFS_DIR3_LEAF1_MAGIC; - - /* - * Set up the leaf tail from the freespace block. - */ - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ltp->bestcount = cpu_to_be32(freehdr.nvalid); - - /* - * Set up the leaf bests table. - */ - memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), - freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); - - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, lbp); - xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); - xfs_dir3_leaf_log_tail(args, lbp); - xfs_dir3_leaf_check(dp, lbp); - - /* - * Get rid of the freespace block. - */ - error = xfs_dir2_shrink_inode(args, - xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), - fbp); - if (error) { - /* - * This can't fail here because it can only happen when - * punching out the middle of an extent, and this is an - * isolated block. - */ - ASSERT(error != ENOSPC); - return error; - } - fbp = NULL; - /* - * Now see if we can convert the single-leaf directory - * down to a block form directory. - * This routine always kills the dabuf for the leaf, so - * eliminate it from the path. - */ - error = xfs_dir2_leaf_to_block(args, lbp, NULL); - state->path.blk[0].bp = NULL; - return error; -} diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c deleted file mode 100644 index 4cf8b99d09a4..000000000000 --- a/fs/xfs/xfs_dir2_node.c +++ /dev/null @@ -1,2284 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" - -/* - * Function declarations. - */ -static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, - int index); -static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, - xfs_da_state_blk_t *blk1, - xfs_da_state_blk_t *blk2); -static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, - int index, xfs_da_state_blk_t *dblk, - int *rval); -static int xfs_dir2_node_addname_int(xfs_da_args_t *args, - xfs_da_state_blk_t *fblk); - -/* - * Check internal consistency of a leafn block. - */ -#ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -static bool -xfs_dir3_leafn_check( - struct xfs_inode *dp, - struct xfs_buf *bp) -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { - struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; - } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) - return false; - - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); -} -#else -#define xfs_dir3_leaf_check(dp, bp) -#endif - -static bool -xfs_dir3_free_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dir2_free_hdr *hdr = bp->b_addr; - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) - return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) - return false; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; - } else { - if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) - return false; - } - - /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ - - return true; -} - -static void -xfs_dir3_free_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_dir3_free_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - - xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); -} - -const struct xfs_buf_ops xfs_dir3_free_buf_ops = { - .verify_read = xfs_dir3_free_read_verify, - .verify_write = xfs_dir3_free_write_verify, -}; - - -static int -__xfs_dir3_free_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t fbno, - xfs_daddr_t mappedbno, - struct xfs_buf **bpp) -{ - int err; - - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_free_buf_ops); - - /* try read returns without an error or *bpp if it lands in a hole */ - if (!err && tp && *bpp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); - return err; -} - -int -xfs_dir2_free_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t fbno, - struct xfs_buf **bpp) -{ - return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp); -} - -static int -xfs_dir2_free_try_read( - struct xfs_trans *tp, - struct xfs_inode *dp, - xfs_dablk_t fbno, - struct xfs_buf **bpp) -{ - return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); -} - -static int -xfs_dir3_free_get_buf( - xfs_da_args_t *args, - xfs_dir2_db_t fbno, - struct xfs_buf **bpp) -{ - struct xfs_trans *tp = args->trans; - struct xfs_inode *dp = args->dp; - struct xfs_mount *mp = dp->i_mount; - struct xfs_buf *bp; - int error; - struct xfs_dir3_icfree_hdr hdr; - - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), - -1, &bp, XFS_DATA_FORK); - if (error) - return error; - - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF); - bp->b_ops = &xfs_dir3_free_buf_ops; - - /* - * Initialize the new block to be empty, and remember - * its first slot as our empty slot. - */ - memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr)); - memset(&hdr, 0, sizeof(hdr)); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; - - hdr.magic = XFS_DIR3_FREE_MAGIC; - - hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); - hdr3->hdr.owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); - } else - hdr.magic = XFS_DIR2_FREE_MAGIC; - dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); - *bpp = bp; - return 0; -} - -/* - * Log entries from a freespace block. - */ -STATIC void -xfs_dir2_free_log_bests( - struct xfs_da_args *args, - struct xfs_buf *bp, - int first, /* first entry to log */ - int last) /* last entry to log */ -{ - xfs_dir2_free_t *free; /* freespace structure */ - __be16 *bests; - - free = bp->b_addr; - bests = args->dp->d_ops->free_bests_p(free); - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || - free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); - xfs_trans_log_buf(args->trans, bp, - (uint)((char *)&bests[first] - (char *)free), - (uint)((char *)&bests[last] - (char *)free + - sizeof(bests[0]) - 1)); -} - -/* - * Log header from a freespace block. - */ -static void -xfs_dir2_free_log_header( - struct xfs_da_args *args, - struct xfs_buf *bp) -{ -#ifdef DEBUG - xfs_dir2_free_t *free; /* freespace structure */ - - free = bp->b_addr; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || - free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); -#endif - xfs_trans_log_buf(args->trans, bp, 0, - args->dp->d_ops->free_hdr_size - 1); -} - -/* - * Convert a leaf-format directory to a node-format directory. - * We need to change the magic number of the leaf block, and copy - * the freespace table out of the leaf block into its own block. - */ -int /* error */ -xfs_dir2_leaf_to_node( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *lbp) /* leaf buffer */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - struct xfs_buf *fbp; /* freespace buffer */ - xfs_dir2_db_t fdb; /* freespace block number */ - xfs_dir2_free_t *free; /* freespace structure */ - __be16 *from; /* pointer to freespace entry */ - int i; /* leaf freespace index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ - int n; /* count of live freespc ents */ - xfs_dir2_data_off_t off; /* freespace entry value */ - __be16 *to; /* pointer to freespace entry */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir3_icfree_hdr freehdr; - - trace_xfs_dir2_leaf_to_node(args); - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - /* - * Add a freespace block to the directory. - */ - if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) { - return error; - } - ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); - /* - * Get the buffer for the new freespace block. - */ - error = xfs_dir3_free_get_buf(args, fdb, &fbp); - if (error) - return error; - - free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ASSERT(be32_to_cpu(ltp->bestcount) <= - (uint)dp->i_d.di_size / args->geo->blksize); - - /* - * Copy freespace entries from the leaf block to the new block. - * Count active entries. - */ - from = xfs_dir2_leaf_bests_p(ltp); - to = dp->d_ops->free_bests_p(free); - for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { - if ((off = be16_to_cpu(*from)) != NULLDATAOFF) - n++; - *to = cpu_to_be16(off); - } - - /* - * Now initialize the freespace block header. - */ - freehdr.nused = n; - freehdr.nvalid = be32_to_cpu(ltp->bestcount); - - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); - xfs_dir2_free_log_header(args, fbp); - - /* - * Converting the leaf to a leafnode is just a matter of changing the - * magic number and the ops. Do the change directly to the buffer as - * it's less work (and less code) than decoding the header to host - * format and back again. - */ - if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)) - leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); - else - leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); - lbp->b_ops = &xfs_dir3_leafn_buf_ops; - xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); - xfs_dir3_leaf_log_header(args, lbp); - xfs_dir3_leaf_check(dp, lbp); - return 0; -} - -/* - * Add a leaf entry to a leaf block in a node-form directory. - * The other work necessary is done from the caller. - */ -static int /* error */ -xfs_dir2_leafn_add( - struct xfs_buf *bp, /* leaf buffer */ - xfs_da_args_t *args, /* operation arguments */ - int index) /* insertion pt for new entry */ -{ - int compact; /* compacting stale leaves */ - xfs_inode_t *dp; /* incore directory inode */ - int highstale; /* next stale entry */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - int lfloghigh; /* high leaf entry logging */ - int lfloglow; /* low leaf entry logging */ - int lowstale; /* previous stale entry */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; - - trace_xfs_dir2_leafn_add(args, index); - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - - /* - * Quick check just to make sure we are not going to index - * into other peoples memory - */ - if (index < 0) - return EFSCORRUPTED; - - /* - * If there are already the maximum number of leaf entries in - * the block, if there are no stale entries it won't fit. - * Caller will do a split. If there are stale entries we'll do - * a compact. - */ - - if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { - if (!leafhdr.stale) - return ENOSPC; - compact = leafhdr.stale > 1; - } else - compact = 0; - ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval); - ASSERT(index == leafhdr.count || - be32_to_cpu(ents[index].hashval) >= args->hashval); - - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; - - /* - * Compact out all but one stale leaf entry. Leaves behind - * the entry closest to index. - */ - if (compact) - xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, - &highstale, &lfloglow, &lfloghigh); - else if (leafhdr.stale) { - /* - * Set impossible logging indices for this case. - */ - lfloglow = leafhdr.count; - lfloghigh = -1; - } - - /* - * Insert the new entry, log everything. - */ - lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, - highstale, &lfloglow, &lfloghigh); - - lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, - args->blkno, args->index)); - - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, bp); - xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(dp, bp); - return 0; -} - -#ifdef DEBUG -static void -xfs_dir2_free_hdr_check( - struct xfs_inode *dp, - struct xfs_buf *bp, - xfs_dir2_db_t db) -{ - struct xfs_dir3_icfree_hdr hdr; - - dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); - - ASSERT((hdr.firstdb % - dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); - ASSERT(hdr.firstdb <= db); - ASSERT(db < hdr.firstdb + hdr.nvalid); -} -#else -#define xfs_dir2_free_hdr_check(dp, bp, db) -#endif /* DEBUG */ - -/* - * Return the last hash value in the leaf. - * Stale entries are ok. - */ -xfs_dahash_t /* hash value */ -xfs_dir2_leafn_lasthash( - struct xfs_inode *dp, - struct xfs_buf *bp, /* leaf buffer */ - int *count) /* count of entries in leaf */ -{ - struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - - ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || - leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); - - if (count) - *count = leafhdr.count; - if (!leafhdr.count) - return 0; - - ents = dp->d_ops->leaf_ents_p(leaf); - return be32_to_cpu(ents[leafhdr.count - 1].hashval); -} - -/* - * Look up a leaf entry for space to add a name in a node-format leaf block. - * The extrablk in state is a freespace block. - */ -STATIC int -xfs_dir2_leafn_lookup_for_addname( - struct xfs_buf *bp, /* leaf buffer */ - xfs_da_args_t *args, /* operation arguments */ - int *indexp, /* out: leaf entry index */ - xfs_da_state_t *state) /* state to fill in */ -{ - struct xfs_buf *curbp = NULL; /* current data/free buffer */ - xfs_dir2_db_t curdb = -1; /* current data block number */ - xfs_dir2_db_t curfdb = -1; /* current free block number */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - int fi; /* free entry index */ - xfs_dir2_free_t *free = NULL; /* free block structure */ - int index; /* leaf entry index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - int length; /* length of new data entry */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_db_t newdb; /* new data block number */ - xfs_dir2_db_t newfdb; /* new free block number */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - - xfs_dir3_leaf_check(dp, bp); - ASSERT(leafhdr.count > 0); - - /* - * Look up the hash value in the leaf entries. - */ - index = xfs_dir2_leaf_search_hash(args, bp); - /* - * Do we have a buffer coming in? - */ - if (state->extravalid) { - /* If so, it's a free block buffer, get the block number. */ - curbp = state->extrablk.bp; - curfdb = state->extrablk.blkno; - free = curbp->b_addr; - ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || - free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); - } - length = dp->d_ops->data_entsize(args->namelen); - /* - * Loop over leaf entries with the right hash value. - */ - for (lep = &ents[index]; - index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; - lep++, index++) { - /* - * Skip stale leaf entries. - */ - if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) - continue; - /* - * Pull the data block number from the entry. - */ - newdb = xfs_dir2_dataptr_to_db(args->geo, - be32_to_cpu(lep->address)); - /* - * For addname, we're looking for a place to put the new entry. - * We want to use a data block with an entry of equal - * hash value to ours if there is one with room. - * - * If this block isn't the data block we already have - * in hand, take a look at it. - */ - if (newdb != curdb) { - __be16 *bests; - - curdb = newdb; - /* - * Convert the data block to the free block - * holding its freespace information. - */ - newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); - /* - * If it's not the one we have in hand, read it in. - */ - if (newfdb != curfdb) { - /* - * If we had one before, drop it. - */ - if (curbp) - xfs_trans_brelse(tp, curbp); - - error = xfs_dir2_free_read(tp, dp, - xfs_dir2_db_to_da(args->geo, - newfdb), - &curbp); - if (error) - return error; - free = curbp->b_addr; - - xfs_dir2_free_hdr_check(dp, curbp, curdb); - } - /* - * Get the index for our entry. - */ - fi = dp->d_ops->db_to_fdindex(args->geo, curdb); - /* - * If it has room, return it. - */ - bests = dp->d_ops->free_bests_p(free); - if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { - XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", - XFS_ERRLEVEL_LOW, mp); - if (curfdb != newfdb) - xfs_trans_brelse(tp, curbp); - return EFSCORRUPTED; - } - curfdb = newfdb; - if (be16_to_cpu(bests[fi]) >= length) - goto out; - } - } - /* Didn't find any space */ - fi = -1; -out: - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - if (curbp) { - /* Giving back a free block. */ - state->extravalid = 1; - state->extrablk.bp = curbp; - state->extrablk.index = fi; - state->extrablk.blkno = curfdb; - - /* - * Important: this magic number is not in the buffer - it's for - * buffer type information and therefore only the free/data type - * matters here, not whether CRCs are enabled or not. - */ - state->extrablk.magic = XFS_DIR2_FREE_MAGIC; - } else { - state->extravalid = 0; - } - /* - * Return the index, that will be the insertion point. - */ - *indexp = index; - return ENOENT; -} - -/* - * Look up a leaf entry in a node-format leaf block. - * The extrablk in state a data block. - */ -STATIC int -xfs_dir2_leafn_lookup_for_entry( - struct xfs_buf *bp, /* leaf buffer */ - xfs_da_args_t *args, /* operation arguments */ - int *indexp, /* out: leaf entry index */ - xfs_da_state_t *state) /* state to fill in */ -{ - struct xfs_buf *curbp = NULL; /* current data/free buffer */ - xfs_dir2_db_t curdb = -1; /* current data block number */ - xfs_dir2_data_entry_t *dep; /* data block entry */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - int index; /* leaf entry index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_db_t newdb; /* new data block number */ - xfs_trans_t *tp; /* transaction pointer */ - enum xfs_dacmp cmp; /* comparison result */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - - xfs_dir3_leaf_check(dp, bp); - ASSERT(leafhdr.count > 0); - - /* - * Look up the hash value in the leaf entries. - */ - index = xfs_dir2_leaf_search_hash(args, bp); - /* - * Do we have a buffer coming in? - */ - if (state->extravalid) { - curbp = state->extrablk.bp; - curdb = state->extrablk.blkno; - } - /* - * Loop over leaf entries with the right hash value. - */ - for (lep = &ents[index]; - index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; - lep++, index++) { - /* - * Skip stale leaf entries. - */ - if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) - continue; - /* - * Pull the data block number from the entry. - */ - newdb = xfs_dir2_dataptr_to_db(args->geo, - be32_to_cpu(lep->address)); - /* - * Not adding a new entry, so we really want to find - * the name given to us. - * - * If it's a different data block, go get it. - */ - if (newdb != curdb) { - /* - * If we had a block before that we aren't saving - * for a CI name, drop it - */ - if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || - curdb != state->extrablk.blkno)) - xfs_trans_brelse(tp, curbp); - /* - * If needing the block that is saved with a CI match, - * use it otherwise read in the new data block. - */ - if (args->cmpresult != XFS_CMP_DIFFERENT && - newdb == state->extrablk.blkno) { - ASSERT(state->extravalid); - curbp = state->extrablk.bp; - } else { - error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(args->geo, - newdb), - -1, &curbp); - if (error) - return error; - } - xfs_dir3_data_check(dp, curbp); - curdb = newdb; - } - /* - * Point to the data entry. - */ - dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(lep->address))); - /* - * Compare the entry and if it's an exact match, return - * EEXIST immediately. If it's the first case-insensitive - * match, store the block & inode number and continue looking. - */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); - if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { - /* If there is a CI match block, drop it */ - if (args->cmpresult != XFS_CMP_DIFFERENT && - curdb != state->extrablk.blkno) - xfs_trans_brelse(tp, state->extrablk.bp); - args->cmpresult = cmp; - args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); - *indexp = index; - state->extravalid = 1; - state->extrablk.bp = curbp; - state->extrablk.blkno = curdb; - state->extrablk.index = (int)((char *)dep - - (char *)curbp->b_addr); - state->extrablk.magic = XFS_DIR2_DATA_MAGIC; - curbp->b_ops = &xfs_dir3_data_buf_ops; - xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); - if (cmp == XFS_CMP_EXACT) - return EEXIST; - } - } - ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); - if (curbp) { - if (args->cmpresult == XFS_CMP_DIFFERENT) { - /* Giving back last used data block. */ - state->extravalid = 1; - state->extrablk.bp = curbp; - state->extrablk.index = -1; - state->extrablk.blkno = curdb; - state->extrablk.magic = XFS_DIR2_DATA_MAGIC; - curbp->b_ops = &xfs_dir3_data_buf_ops; - xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); - } else { - /* If the curbp is not the CI match block, drop it */ - if (state->extrablk.bp != curbp) - xfs_trans_brelse(tp, curbp); - } - } else { - state->extravalid = 0; - } - *indexp = index; - return ENOENT; -} - -/* - * Look up a leaf entry in a node-format leaf block. - * If this is an addname then the extrablk in state is a freespace block, - * otherwise it's a data block. - */ -int -xfs_dir2_leafn_lookup_int( - struct xfs_buf *bp, /* leaf buffer */ - xfs_da_args_t *args, /* operation arguments */ - int *indexp, /* out: leaf entry index */ - xfs_da_state_t *state) /* state to fill in */ -{ - if (args->op_flags & XFS_DA_OP_ADDNAME) - return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp, - state); - return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state); -} - -/* - * Move count leaf entries from source to destination leaf. - * Log entries and headers. Stale entries are preserved. - */ -static void -xfs_dir3_leafn_moveents( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *bp_s, /* source */ - struct xfs_dir3_icleaf_hdr *shdr, - struct xfs_dir2_leaf_entry *sents, - int start_s,/* source leaf index */ - struct xfs_buf *bp_d, /* destination */ - struct xfs_dir3_icleaf_hdr *dhdr, - struct xfs_dir2_leaf_entry *dents, - int start_d,/* destination leaf index */ - int count) /* count of leaves to copy */ -{ - int stale; /* count stale leaves copied */ - - trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); - - /* - * Silently return if nothing to do. - */ - if (count == 0) - return; - - /* - * If the destination index is not the end of the current - * destination leaf entries, open up a hole in the destination - * to hold the new entries. - */ - if (start_d < dhdr->count) { - memmove(&dents[start_d + count], &dents[start_d], - (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, - count + dhdr->count - 1); - } - /* - * If the source has stale leaves, count the ones in the copy range - * so we can update the header correctly. - */ - if (shdr->stale) { - int i; /* temp leaf index */ - - for (i = start_s, stale = 0; i < start_s + count; i++) { - if (sents[i].address == - cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) - stale++; - } - } else - stale = 0; - /* - * Copy the leaf entries from source to destination. - */ - memcpy(&dents[start_d], &sents[start_s], - count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); - - /* - * If there are source entries after the ones we copied, - * delete the ones we copied by sliding the next ones down. - */ - if (start_s + count < shdr->count) { - memmove(&sents[start_s], &sents[start_s + count], - count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); - } - - /* - * Update the headers and log them. - */ - shdr->count -= count; - shdr->stale -= stale; - dhdr->count += count; - dhdr->stale += stale; -} - -/* - * Determine the sort order of two leaf blocks. - * Returns 1 if both are valid and leaf2 should be before leaf1, else 0. - */ -int /* sort order */ -xfs_dir2_leafn_order( - struct xfs_inode *dp, - struct xfs_buf *leaf1_bp, /* leaf1 buffer */ - struct xfs_buf *leaf2_bp) /* leaf2 buffer */ -{ - struct xfs_dir2_leaf *leaf1 = leaf1_bp->b_addr; - struct xfs_dir2_leaf *leaf2 = leaf2_bp->b_addr; - struct xfs_dir2_leaf_entry *ents1; - struct xfs_dir2_leaf_entry *ents2; - struct xfs_dir3_icleaf_hdr hdr1; - struct xfs_dir3_icleaf_hdr hdr2; - - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); - - if (hdr1.count > 0 && hdr2.count > 0 && - (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || - be32_to_cpu(ents2[hdr2.count - 1].hashval) < - be32_to_cpu(ents1[hdr1.count - 1].hashval))) - return 1; - return 0; -} - -/* - * Rebalance leaf entries between two leaf blocks. - * This is actually only called when the second block is new, - * though the code deals with the general case. - * A new entry will be inserted in one of the blocks, and that - * entry is taken into account when balancing. - */ -static void -xfs_dir2_leafn_rebalance( - xfs_da_state_t *state, /* btree cursor */ - xfs_da_state_blk_t *blk1, /* first btree block */ - xfs_da_state_blk_t *blk2) /* second btree block */ -{ - xfs_da_args_t *args; /* operation arguments */ - int count; /* count (& direction) leaves */ - int isleft; /* new goes in left leaf */ - xfs_dir2_leaf_t *leaf1; /* first leaf structure */ - xfs_dir2_leaf_t *leaf2; /* second leaf structure */ - int mid; /* midpoint leaf index */ -#if defined(DEBUG) || defined(XFS_WARN) - int oldstale; /* old count of stale leaves */ -#endif - int oldsum; /* old total leaf count */ - int swap; /* swapped leaf blocks */ - struct xfs_dir2_leaf_entry *ents1; - struct xfs_dir2_leaf_entry *ents2; - struct xfs_dir3_icleaf_hdr hdr1; - struct xfs_dir3_icleaf_hdr hdr2; - struct xfs_inode *dp = state->args->dp; - - args = state->args; - /* - * If the block order is wrong, swap the arguments. - */ - if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { - xfs_da_state_blk_t *tmp; /* temp for block swap */ - - tmp = blk1; - blk1 = blk2; - blk2 = tmp; - } - leaf1 = blk1->bp->b_addr; - leaf2 = blk2->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); - - oldsum = hdr1.count + hdr2.count; -#if defined(DEBUG) || defined(XFS_WARN) - oldstale = hdr1.stale + hdr2.stale; -#endif - mid = oldsum >> 1; - - /* - * If the old leaf count was odd then the new one will be even, - * so we need to divide the new count evenly. - */ - if (oldsum & 1) { - xfs_dahash_t midhash; /* middle entry hash value */ - - if (mid >= hdr1.count) - midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval); - else - midhash = be32_to_cpu(ents1[mid].hashval); - isleft = args->hashval <= midhash; - } - /* - * If the old count is even then the new count is odd, so there's - * no preferred side for the new entry. - * Pick the left one. - */ - else - isleft = 1; - /* - * Calculate moved entry count. Positive means left-to-right, - * negative means right-to-left. Then move the entries. - */ - count = hdr1.count - mid + (isleft == 0); - if (count > 0) - xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1, - hdr1.count - count, blk2->bp, - &hdr2, ents2, 0, count); - else if (count < 0) - xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0, - blk1->bp, &hdr1, ents1, - hdr1.count, count); - - ASSERT(hdr1.count + hdr2.count == oldsum); - ASSERT(hdr1.stale + hdr2.stale == oldstale); - - /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); - dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); - xfs_dir3_leaf_log_header(args, blk1->bp); - xfs_dir3_leaf_log_header(args, blk2->bp); - - xfs_dir3_leaf_check(dp, blk1->bp); - xfs_dir3_leaf_check(dp, blk2->bp); - - /* - * Mark whether we're inserting into the old or new leaf. - */ - if (hdr1.count < hdr2.count) - state->inleaf = swap; - else if (hdr1.count > hdr2.count) - state->inleaf = !swap; - else - state->inleaf = swap ^ (blk1->index <= hdr1.count); - /* - * Adjust the expected index for insertion. - */ - if (!state->inleaf) - blk2->index = blk1->index - hdr1.count; - - /* - * Finally sanity check just to make sure we are not returning a - * negative index - */ - if (blk2->index < 0) { - state->inleaf = 1; - blk2->index = 0; - xfs_alert(dp->i_mount, - "%s: picked the wrong leaf? reverting original leaf: blk1->index %d", - __func__, blk1->index); - } -} - -static int -xfs_dir3_data_block_free( - xfs_da_args_t *args, - struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_free *free, - xfs_dir2_db_t fdb, - int findex, - struct xfs_buf *fbp, - int longest) -{ - int logfree = 0; - __be16 *bests; - struct xfs_dir3_icfree_hdr freehdr; - struct xfs_inode *dp = args->dp; - - dp->d_ops->free_hdr_from_disk(&freehdr, free); - bests = dp->d_ops->free_bests_p(free); - if (hdr) { - /* - * Data block is not empty, just set the free entry to the new - * value. - */ - bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(args, fbp, findex, findex); - return 0; - } - - /* One less used entry in the free table. */ - freehdr.nused--; - - /* - * If this was the last entry in the table, we can trim the table size - * back. There might be other entries at the end referring to - * non-existent data blocks, get those too. - */ - if (findex == freehdr.nvalid - 1) { - int i; /* free entry index */ - - for (i = findex - 1; i >= 0; i--) { - if (bests[i] != cpu_to_be16(NULLDATAOFF)) - break; - } - freehdr.nvalid = i + 1; - logfree = 0; - } else { - /* Not the last entry, just punch it out. */ - bests[findex] = cpu_to_be16(NULLDATAOFF); - logfree = 1; - } - - dp->d_ops->free_hdr_to_disk(free, &freehdr); - xfs_dir2_free_log_header(args, fbp); - - /* - * If there are no useful entries left in the block, get rid of the - * block if we can. - */ - if (!freehdr.nused) { - int error; - - error = xfs_dir2_shrink_inode(args, fdb, fbp); - if (error == 0) { - fbp = NULL; - logfree = 0; - } else if (error != ENOSPC || args->total != 0) - return error; - /* - * It's possible to get ENOSPC if there is no - * space reservation. In this case some one - * else will eventually get rid of this block. - */ - } - - /* Log the free entry that changed, unless we got rid of it. */ - if (logfree) - xfs_dir2_free_log_bests(args, fbp, findex, findex); - return 0; -} - -/* - * Remove an entry from a node directory. - * This removes the leaf entry and the data entry, - * and updates the free block if necessary. - */ -static int /* error */ -xfs_dir2_leafn_remove( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *bp, /* leaf buffer */ - int index, /* leaf entry index */ - xfs_da_state_blk_t *dblk, /* data block */ - int *rval) /* resulting block needs join */ -{ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_db_t db; /* data block number */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data block entry */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry */ - int longest; /* longest data free entry */ - int off; /* data block entry offset */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data frees */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_data_free *bf; /* bestfree table */ - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; - - trace_xfs_dir2_leafn_remove(args, index); - - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - - /* - * Point to the entry we're removing. - */ - lep = &ents[index]; - - /* - * Extract the data block and offset from the entry. - */ - db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); - ASSERT(dblk->blkno == db); - off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); - ASSERT(dblk->index == off); - - /* - * Kill the leaf entry by marking it stale. - * Log the leaf block changes. - */ - leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(args, bp); - - lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(args, bp, index, index); - - /* - * Make the data entry free. Keep track of the longest freespace - * in the data block in case it changes. - */ - dbp = dblk->bp; - hdr = dbp->b_addr; - dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - bf = dp->d_ops->data_bestfree_p(hdr); - longest = be16_to_cpu(bf[0].length); - needlog = needscan = 0; - xfs_dir2_data_make_free(args, dbp, off, - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); - /* - * Rescan the data block freespaces for bestfree. - * Log the data block header if needed. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - if (needlog) - xfs_dir2_data_log_header(args, dbp); - xfs_dir3_data_check(dp, dbp); - /* - * If the longest data block freespace changes, need to update - * the corresponding freeblock entry. - */ - if (longest < be16_to_cpu(bf[0].length)) { - int error; /* error return value */ - struct xfs_buf *fbp; /* freeblock buffer */ - xfs_dir2_db_t fdb; /* freeblock block number */ - int findex; /* index in freeblock entries */ - xfs_dir2_free_t *free; /* freeblock structure */ - - /* - * Convert the data block number to a free block, - * read in the free block. - */ - fdb = dp->d_ops->db_to_fdb(args->geo, db); - error = xfs_dir2_free_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fdb), - &fbp); - if (error) - return error; - free = fbp->b_addr; -#ifdef DEBUG - { - struct xfs_dir3_icfree_hdr freehdr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * - (fdb - xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET))); - } -#endif - /* - * Calculate which entry we need to fix. - */ - findex = dp->d_ops->db_to_fdindex(args->geo, db); - longest = be16_to_cpu(bf[0].length); - /* - * If the data block is now empty we can get rid of it - * (usually). - */ - if (longest == args->geo->blksize - - dp->d_ops->data_entry_offset) { - /* - * Try to punch out the data block. - */ - error = xfs_dir2_shrink_inode(args, db, dbp); - if (error == 0) { - dblk->bp = NULL; - hdr = NULL; - } - /* - * We can get ENOSPC if there's no space reservation. - * In this case just drop the buffer and some one else - * will eventually get rid of the empty block. - */ - else if (!(error == ENOSPC && args->total == 0)) - return error; - } - /* - * If we got rid of the data block, we can eliminate that entry - * in the free block. - */ - error = xfs_dir3_data_block_free(args, hdr, free, - fdb, findex, fbp, longest); - if (error) - return error; - } - - xfs_dir3_leaf_check(dp, bp); - /* - * Return indication of whether this leaf block is empty enough - * to justify trying to join it with a neighbor. - */ - *rval = (dp->d_ops->leaf_hdr_size + - (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < - args->geo->magicpct; - return 0; -} - -/* - * Split the leaf entries in the old block into old and new blocks. - */ -int /* error */ -xfs_dir2_leafn_split( - xfs_da_state_t *state, /* btree cursor */ - xfs_da_state_blk_t *oldblk, /* original block */ - xfs_da_state_blk_t *newblk) /* newly created block */ -{ - xfs_da_args_t *args; /* operation arguments */ - xfs_dablk_t blkno; /* new leaf block number */ - int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ - struct xfs_inode *dp; - - /* - * Allocate space for a new leaf node. - */ - args = state->args; - dp = args->dp; - mp = dp->i_mount; - ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); - error = xfs_da_grow_inode(args, &blkno); - if (error) { - return error; - } - /* - * Initialize the new leaf block. - */ - error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno), - &newblk->bp, XFS_DIR2_LEAFN_MAGIC); - if (error) - return error; - - newblk->blkno = blkno; - newblk->magic = XFS_DIR2_LEAFN_MAGIC; - /* - * Rebalance the entries across the two leaves, link the new - * block into the leaves. - */ - xfs_dir2_leafn_rebalance(state, oldblk, newblk); - error = xfs_da3_blk_link(state, oldblk, newblk); - if (error) { - return error; - } - /* - * Insert the new entry in the correct block. - */ - if (state->inleaf) - error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index); - else - error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index); - /* - * Update last hashval in each block since we added the name. - */ - oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); - newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); - xfs_dir3_leaf_check(dp, oldblk->bp); - xfs_dir3_leaf_check(dp, newblk->bp); - return error; -} - -/* - * Check a leaf block and its neighbors to see if the block should be - * collapsed into one or the other neighbor. Always keep the block - * with the smaller block number. - * If the current block is over 50% full, don't try to join it, return 0. - * If the block is empty, fill in the state structure and return 2. - * If it can be collapsed, fill in the state structure and return 1. - * If nothing can be done, return 0. - */ -int /* error */ -xfs_dir2_leafn_toosmall( - xfs_da_state_t *state, /* btree cursor */ - int *action) /* resulting action to take */ -{ - xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dablk_t blkno; /* leaf block number */ - struct xfs_buf *bp; /* leaf buffer */ - int bytes; /* bytes in use */ - int count; /* leaf live entry count */ - int error; /* error return value */ - int forward; /* sibling block direction */ - int i; /* sibling counter */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - int rval; /* result from path_shift */ - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; - struct xfs_inode *dp = state->args->dp; - - /* - * Check for the degenerate case of the block being over 50% full. - * If so, it's not worth even looking to see if we might be able - * to coalesce with a sibling. - */ - blk = &state->path.blk[state->path.active - 1]; - leaf = blk->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - xfs_dir3_leaf_check(dp, blk->bp); - - count = leafhdr.count - leafhdr.stale; - bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); - if (bytes > (state->args->geo->blksize >> 1)) { - /* - * Blk over 50%, don't try to join. - */ - *action = 0; - return 0; - } - /* - * Check for the degenerate case of the block being empty. - * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (arbitrarily) - * to merge with the forward block unless it is NULL. - */ - if (count == 0) { - /* - * Make altpath point to the block we want to keep and - * path point to the block we want to drop (this one). - */ - forward = (leafhdr.forw != 0); - memcpy(&state->altpath, &state->path, sizeof(state->path)); - error = xfs_da3_path_shift(state, &state->altpath, forward, 0, - &rval); - if (error) - return error; - *action = rval ? 2 : 0; - return 0; - } - /* - * Examine each sibling block to see if we can coalesce with - * at least 25% free space to spare. We need to figure out - * whether to merge with the forward or the backward block. - * We prefer coalescing with the lower numbered sibling so as - * to shrink a directory over time. - */ - forward = leafhdr.forw < leafhdr.back; - for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { - struct xfs_dir3_icleaf_hdr hdr2; - - blkno = forward ? leafhdr.forw : leafhdr.back; - if (blkno == 0) - continue; - /* - * Read the sibling leaf block. - */ - error = xfs_dir3_leafn_read(state->args->trans, dp, - blkno, -1, &bp); - if (error) - return error; - - /* - * Count bytes in the two blocks combined. - */ - count = leafhdr.count - leafhdr.stale; - bytes = state->args->geo->blksize - - (state->args->geo->blksize >> 2); - - leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - count += hdr2.count - hdr2.stale; - bytes -= count * sizeof(ents[0]); - - /* - * Fits with at least 25% to spare. - */ - if (bytes >= 0) - break; - xfs_trans_brelse(state->args->trans, bp); - } - /* - * Didn't like either block, give up. - */ - if (i >= 2) { - *action = 0; - return 0; - } - - /* - * Make altpath point to the block we want to keep (the lower - * numbered block) and path point to the block we want to drop. - */ - memcpy(&state->altpath, &state->path, sizeof(state->path)); - if (blkno < blk->blkno) - error = xfs_da3_path_shift(state, &state->altpath, forward, 0, - &rval); - else - error = xfs_da3_path_shift(state, &state->path, forward, 0, - &rval); - if (error) { - return error; - } - *action = rval ? 0 : 1; - return 0; -} - -/* - * Move all the leaf entries from drop_blk to save_blk. - * This is done as part of a join operation. - */ -void -xfs_dir2_leafn_unbalance( - xfs_da_state_t *state, /* cursor */ - xfs_da_state_blk_t *drop_blk, /* dead block */ - xfs_da_state_blk_t *save_blk) /* surviving block */ -{ - xfs_da_args_t *args; /* operation arguments */ - xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ - xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ - struct xfs_dir3_icleaf_hdr savehdr; - struct xfs_dir3_icleaf_hdr drophdr; - struct xfs_dir2_leaf_entry *sents; - struct xfs_dir2_leaf_entry *dents; - struct xfs_inode *dp = state->args->dp; - - args = state->args; - ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); - ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); - drop_leaf = drop_blk->bp->b_addr; - save_leaf = save_blk->bp->b_addr; - - dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); - dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); - sents = dp->d_ops->leaf_ents_p(save_leaf); - dents = dp->d_ops->leaf_ents_p(drop_leaf); - - /* - * If there are any stale leaf entries, take this opportunity - * to purge them. - */ - if (drophdr.stale) - xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp); - if (savehdr.stale) - xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp); - - /* - * Move the entries from drop to the appropriate end of save. - */ - drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); - if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp)) - xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, - save_blk->bp, &savehdr, sents, 0, - drophdr.count); - else - xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, - save_blk->bp, &savehdr, sents, - savehdr.count, drophdr.count); - save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); - - /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); - dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); - xfs_dir3_leaf_log_header(args, save_blk->bp); - xfs_dir3_leaf_log_header(args, drop_blk->bp); - - xfs_dir3_leaf_check(dp, save_blk->bp); - xfs_dir3_leaf_check(dp, drop_blk->bp); -} - -/* - * Top-level node form directory addname routine. - */ -int /* error */ -xfs_dir2_node_addname( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_da_state_blk_t *blk; /* leaf block for insert */ - int error; /* error return value */ - int rval; /* sub-return value */ - xfs_da_state_t *state; /* btree cursor */ - - trace_xfs_dir2_node_addname(args); - - /* - * Allocate and initialize the state (btree cursor). - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - /* - * Look up the name. We're not supposed to find it, but - * this gives us the insertion point. - */ - error = xfs_da3_node_lookup_int(state, &rval); - if (error) - rval = error; - if (rval != ENOENT) { - goto done; - } - /* - * Add the data entry to a data block. - * Extravalid is set to a freeblock found by lookup. - */ - rval = xfs_dir2_node_addname_int(args, - state->extravalid ? &state->extrablk : NULL); - if (rval) { - goto done; - } - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - /* - * Add the new leaf entry. - */ - rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); - if (rval == 0) { - /* - * It worked, fix the hash values up the btree. - */ - if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) - xfs_da3_fixhashpath(state, &state->path); - } else { - /* - * It didn't work, we need to split the leaf block. - */ - if (args->total == 0) { - ASSERT(rval == ENOSPC); - goto done; - } - /* - * Split the leaf block and insert the new entry. - */ - rval = xfs_da3_split(state); - } -done: - xfs_da_state_free(state); - return rval; -} - -/* - * Add the data entry for a node-format directory name addition. - * The leaf entry is added in xfs_dir2_leafn_add. - * We may enter with a freespace block that the lookup found. - */ -static int /* error */ -xfs_dir2_node_addname_int( - xfs_da_args_t *args, /* operation arguments */ - xfs_da_state_blk_t *fblk) /* optional freespace block */ -{ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_db_t dbno; /* data block number */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ - int error; /* error return value */ - xfs_dir2_db_t fbno; /* freespace block number */ - struct xfs_buf *fbp; /* freespace buffer */ - int findex; /* freespace entry index */ - xfs_dir2_free_t *free=NULL; /* freespace block structure */ - xfs_dir2_db_t ifbno; /* initial freespace block no */ - xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ - int length; /* length of the new entry */ - int logfree; /* need to log free entry */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data frees */ - __be16 *tagp; /* data entry tag pointer */ - xfs_trans_t *tp; /* transaction pointer */ - __be16 *bests; - struct xfs_dir3_icfree_hdr freehdr; - struct xfs_dir2_data_free *bf; - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - length = dp->d_ops->data_entsize(args->namelen); - /* - * If we came in with a freespace block that means that lookup - * found an entry with our hash value. This is the freespace - * block for that data entry. - */ - if (fblk) { - fbp = fblk->bp; - /* - * Remember initial freespace block number. - */ - ifbno = fblk->blkno; - free = fbp->b_addr; - findex = fblk->index; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * This means the free entry showed that the data block had - * space for our entry, so we remembered it. - * Use that data block. - */ - if (findex >= 0) { - ASSERT(findex < freehdr.nvalid); - ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); - ASSERT(be16_to_cpu(bests[findex]) >= length); - dbno = freehdr.firstdb + findex; - } else { - /* - * The data block looked at didn't have enough room. - * We'll start at the beginning of the freespace entries. - */ - dbno = -1; - findex = 0; - } - } else { - /* - * Didn't come in with a freespace block, so no data block. - */ - ifbno = dbno = -1; - fbp = NULL; - findex = 0; - } - - /* - * If we don't have a data block yet, we're going to scan the - * freespace blocks looking for one. Figure out what the - * highest freespace block number is. - */ - if (dbno == -1) { - xfs_fileoff_t fo; /* freespace block number */ - - if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) - return error; - lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); - fbno = ifbno; - } - /* - * While we haven't identified a data block, search the freeblock - * data for a good data block. If we find a null freeblock entry, - * indicating a hole in the data blocks, remember that. - */ - while (dbno == -1) { - /* - * If we don't have a freeblock in hand, get the next one. - */ - if (fbp == NULL) { - /* - * Happens the first time through unless lookup gave - * us a freespace block to start with. - */ - if (++fbno == 0) - fbno = xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET); - /* - * If it's ifbno we already looked at it. - */ - if (fbno == ifbno) - fbno++; - /* - * If it's off the end we're done. - */ - if (fbno >= lastfbno) - break; - /* - * Read the block. There can be holes in the - * freespace blocks, so this might not succeed. - * This should be really rare, so there's no reason - * to avoid it. - */ - error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); - if (error) - return error; - if (!fbp) - continue; - free = fbp->b_addr; - findex = 0; - } - /* - * Look at the current free entry. Is it good enough? - * - * The bests initialisation should be where the bufer is read in - * the above branch. But gcc is too stupid to realise that bests - * and the freehdr are actually initialised if they are placed - * there, so we have to do it here to avoid warnings. Blech. - */ - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - if (be16_to_cpu(bests[findex]) != NULLDATAOFF && - be16_to_cpu(bests[findex]) >= length) - dbno = freehdr.firstdb + findex; - else { - /* - * Are we done with the freeblock? - */ - if (++findex == freehdr.nvalid) { - /* - * Drop the block. - */ - xfs_trans_brelse(tp, fbp); - fbp = NULL; - if (fblk && fblk->bp) - fblk->bp = NULL; - } - } - } - /* - * If we don't have a data block, we need to allocate one and make - * the freespace entries refer to it. - */ - if (unlikely(dbno == -1)) { - /* - * Not allowed to allocate, return failure. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return ENOSPC; - - /* - * Allocate and initialize the new data block. - */ - if (unlikely((error = xfs_dir2_grow_inode(args, - XFS_DIR2_DATA_SPACE, - &dbno)) || - (error = xfs_dir3_data_init(args, dbno, &dbp)))) - return error; - - /* - * If (somehow) we have a freespace block, get rid of it. - */ - if (fbp) - xfs_trans_brelse(tp, fbp); - if (fblk && fblk->bp) - fblk->bp = NULL; - - /* - * Get the freespace block corresponding to the data block - * that was just allocated. - */ - fbno = dp->d_ops->db_to_fdb(args->geo, dbno); - error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); - if (error) - return error; - - /* - * If there wasn't a freespace block, the read will - * return a NULL fbp. Allocate and initialize a new one. - */ - if (!fbp) { - error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, - &fbno); - if (error) - return error; - - if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { - xfs_alert(mp, - "%s: dir ino %llu needed freesp block %lld for\n" - " data block %lld, got %lld ifbno %llu lastfbno %d", - __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb( - args->geo, dbno), - (long long)dbno, (long long)fbno, - (unsigned long long)ifbno, lastfbno); - if (fblk) { - xfs_alert(mp, - " fblk 0x%p blkno %llu index %d magic 0x%x", - fblk, - (unsigned long long)fblk->blkno, - fblk->index, - fblk->magic); - } else { - xfs_alert(mp, " ... fblk is NULL"); - } - XFS_ERROR_REPORT("xfs_dir2_node_addname_int", - XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; - } - - /* - * Get a buffer for the new block. - */ - error = xfs_dir3_free_get_buf(args, fbno, &fbp); - if (error) - return error; - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * Remember the first slot as our empty slot. - */ - freehdr.firstdb = - (fbno - xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET)) * - dp->d_ops->free_max_bests(args->geo); - } else { - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - } - - /* - * Set the freespace block index from the data block number. - */ - findex = dp->d_ops->db_to_fdindex(args->geo, dbno); - /* - * If it's after the end of the current entries in the - * freespace block, extend that table. - */ - if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); - freehdr.nvalid = findex + 1; - /* - * Tag new entry so nused will go up. - */ - bests[findex] = cpu_to_be16(NULLDATAOFF); - } - /* - * If this entry was for an empty data block - * (this should always be true) then update the header. - */ - if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { - freehdr.nused++; - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(args, fbp); - } - /* - * Update the real value in the table. - * We haven't allocated the data entry yet so this will - * change again. - */ - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - bests[findex] = bf[0].length; - logfree = 1; - } - /* - * We had a data block so we don't have to make a new one. - */ - else { - /* - * If just checking, we succeeded. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; - - /* - * Read the data block in. - */ - error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(args->geo, dbno), - -1, &dbp); - if (error) - return error; - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - logfree = 0; - } - ASSERT(be16_to_cpu(bf[0].length) >= length); - /* - * Point to the existing unused space. - */ - dup = (xfs_dir2_data_unused_t *) - ((char *)hdr + be16_to_cpu(bf[0].offset)); - needscan = needlog = 0; - /* - * Mark the first part of the unused space, inuse for us. - */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); - /* - * Fill in the new entry and log it. - */ - dep = (xfs_dir2_data_entry_t *)dup; - dep->inumber = cpu_to_be64(args->inumber); - dep->namelen = args->namelen; - memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(args, dbp, dep); - /* - * Rescan the block for bestfree if needed. - */ - if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); - /* - * Log the data block header if needed. - */ - if (needlog) - xfs_dir2_data_log_header(args, dbp); - /* - * If the freespace entry is now wrong, update it. - */ - bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ - if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { - bests[findex] = bf[0].length; - logfree = 1; - } - /* - * Log the freespace entry if needed. - */ - if (logfree) - xfs_dir2_free_log_bests(args, fbp, findex, findex); - /* - * Return the data block and offset in args, then drop the data block. - */ - args->blkno = (xfs_dablk_t)dbno; - args->index = be16_to_cpu(*tagp); - return 0; -} - -/* - * Lookup an entry in a node-format directory. - * All the real work happens in xfs_da3_node_lookup_int. - * The only real output is the inode number of the entry. - */ -int /* error */ -xfs_dir2_node_lookup( - xfs_da_args_t *args) /* operation arguments */ -{ - int error; /* error return value */ - int i; /* btree level */ - int rval; /* operation return value */ - xfs_da_state_t *state; /* btree cursor */ - - trace_xfs_dir2_node_lookup(args); - - /* - * Allocate and initialize the btree cursor. - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - /* - * Fill in the path to the entry in the cursor. - */ - error = xfs_da3_node_lookup_int(state, &rval); - if (error) - rval = error; - else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { - /* If a CI match, dup the actual name and return EEXIST */ - xfs_dir2_data_entry_t *dep; - - dep = (xfs_dir2_data_entry_t *) - ((char *)state->extrablk.bp->b_addr + - state->extrablk.index); - rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - } - /* - * Release the btree blocks and leaf block. - */ - for (i = 0; i < state->path.active; i++) { - xfs_trans_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - /* - * Release the data block if we have it. - */ - if (state->extravalid && state->extrablk.bp) { - xfs_trans_brelse(args->trans, state->extrablk.bp); - state->extrablk.bp = NULL; - } - xfs_da_state_free(state); - return rval; -} - -/* - * Remove an entry from a node-format directory. - */ -int /* error */ -xfs_dir2_node_removename( - struct xfs_da_args *args) /* operation arguments */ -{ - struct xfs_da_state_blk *blk; /* leaf block */ - int error; /* error return value */ - int rval; /* operation return value */ - struct xfs_da_state *state; /* btree cursor */ - - trace_xfs_dir2_node_removename(args); - - /* - * Allocate and initialize the btree cursor. - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - - /* Look up the entry we're deleting, set up the cursor. */ - error = xfs_da3_node_lookup_int(state, &rval); - if (error) - goto out_free; - - /* Didn't find it, upper layer screwed up. */ - if (rval != EEXIST) { - error = rval; - goto out_free; - } - - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - ASSERT(state->extravalid); - /* - * Remove the leaf and data entries. - * Extrablk refers to the data block. - */ - error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, - &state->extrablk, &rval); - if (error) - goto out_free; - /* - * Fix the hash values up the btree. - */ - xfs_da3_fixhashpath(state, &state->path); - /* - * If we need to join leaf blocks, do it. - */ - if (rval && state->path.active > 1) - error = xfs_da3_join(state); - /* - * If no errors so far, try conversion to leaf format. - */ - if (!error) - error = xfs_dir2_node_to_leaf(state); -out_free: - xfs_da_state_free(state); - return error; -} - -/* - * Replace an entry's inode number in a node-format directory. - */ -int /* error */ -xfs_dir2_node_replace( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_data_entry_t *dep; /* data entry changed */ - int error; /* error return value */ - int i; /* btree level */ - xfs_ino_t inum; /* new inode number */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ - int rval; /* internal return value */ - xfs_da_state_t *state; /* btree cursor */ - - trace_xfs_dir2_node_replace(args); - - /* - * Allocate and initialize the btree cursor. - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - inum = args->inumber; - /* - * Lookup the entry to change in the btree. - */ - error = xfs_da3_node_lookup_int(state, &rval); - if (error) { - rval = error; - } - /* - * It should be found, since the vnodeops layer has looked it up - * and locked it. But paranoia is good. - */ - if (rval == EEXIST) { - struct xfs_dir2_leaf_entry *ents; - /* - * Find the leaf entry. - */ - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - leaf = blk->bp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); - lep = &ents[blk->index]; - ASSERT(state->extravalid); - /* - * Point to the data entry. - */ - hdr = state->extrablk.bp->b_addr; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(lep->address))); - ASSERT(inum != be64_to_cpu(dep->inumber)); - /* - * Fill in the new inode number and log the entry. - */ - dep->inumber = cpu_to_be64(inum); - args->dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); - rval = 0; - } - /* - * Didn't find it, and we're holding a data block. Drop it. - */ - else if (state->extravalid) { - xfs_trans_brelse(args->trans, state->extrablk.bp); - state->extrablk.bp = NULL; - } - /* - * Release all the buffers in the cursor. - */ - for (i = 0; i < state->path.active; i++) { - xfs_trans_brelse(args->trans, state->path.blk[i].bp); - state->path.blk[i].bp = NULL; - } - xfs_da_state_free(state); - return rval; -} - -/* - * Trim off a trailing empty freespace block. - * Return (in rvalp) 1 if we did it, 0 if not. - */ -int /* error */ -xfs_dir2_node_trim_free( - xfs_da_args_t *args, /* operation arguments */ - xfs_fileoff_t fo, /* free block number */ - int *rvalp) /* out: did something */ -{ - struct xfs_buf *bp; /* freespace buffer */ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return code */ - xfs_dir2_free_t *free; /* freespace structure */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir3_icfree_hdr freehdr; - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - /* - * Read the freespace block. - */ - error = xfs_dir2_free_try_read(tp, dp, fo, &bp); - if (error) - return error; - /* - * There can be holes in freespace. If fo is a hole, there's - * nothing to do. - */ - if (!bp) - return 0; - free = bp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * If there are used entries, there's nothing to do. - */ - if (freehdr.nused > 0) { - xfs_trans_brelse(tp, bp); - *rvalp = 0; - return 0; - } - /* - * Blow the block away. - */ - error = xfs_dir2_shrink_inode(args, - xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); - if (error) { - /* - * Can't fail with ENOSPC since that only happens with no - * space reservation, when breaking up an extent into two - * pieces. This is the last block of an extent. - */ - ASSERT(error != ENOSPC); - xfs_trans_brelse(tp, bp); - return error; - } - /* - * Return that we succeeded. - */ - *rvalp = 1; - return 0; -} diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h deleted file mode 100644 index 27ce0794d196..000000000000 --- a/fs/xfs/xfs_dir2_priv.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_PRIV_H__ -#define __XFS_DIR2_PRIV_H__ - -struct dir_context; - -/* - * Directory offset/block conversion functions. - * - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t)(by >> geo->blklog); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << geo->blklog) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); -} - -/* - * Directory tail pointer accessor functions. Based on block geometry. - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + geo->blksize)) - 1; -} - -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + geo->blksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - -/* xfs_dir2.c */ -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); -extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, - xfs_dir2_db_t *dbp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, - const unsigned char *name, int len); - -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; - -extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, - __uint8_t filetype); - - -/* xfs_dir2_block.c */ -extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_buf **bpp); -extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_lookup(struct xfs_da_args *args); -extern int xfs_dir2_block_removename(struct xfs_da_args *args); -extern int xfs_dir2_block_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_buf *lbp, struct xfs_buf *dbp); - -/* xfs_dir2_data.c */ -#ifdef DEBUG -#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); -#else -#define xfs_dir3_data_check(dp,bp) -#endif - -extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); -extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); -extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mapped_bno); - -extern struct xfs_dir2_data_free * -xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, - int *loghead); -extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_buf **bpp); - -/* xfs_dir2_leaf.c */ -extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); -extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_buf *dbp); -extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); -extern void xfs_dir3_leaf_compact(struct xfs_da_args *args, - struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp); -extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_dir2_leaf_entry *ents, int *indexp, - int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); -extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, - struct xfs_buf *bp, int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, - struct xfs_buf *bp); -extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); -extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_buf *lbp); -extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_buf *lbp, xfs_dir2_db_t db); -extern struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, - struct xfs_dir2_leaf_entry *ents, int index, int compact, - int lowstale, int highstale, int *lfloglow, int *lfloghigh); -extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); - -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); - -/* xfs_dir2_node.c */ -extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_buf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, - struct xfs_buf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, - struct xfs_da_args *args, int *indexp, - struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp, - struct xfs_buf *leaf2_bp); -extern int xfs_dir2_leafn_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); -extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); -extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); -extern int xfs_dir2_node_addname(struct xfs_da_args *args); -extern int xfs_dir2_node_lookup(struct xfs_da_args *args); -extern int xfs_dir2_node_removename(struct xfs_da_args *args); -extern int xfs_dir2_node_replace(struct xfs_da_args *args); -extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, - int *rvalp); -extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t fbno, struct xfs_buf **bpp); - -/* xfs_dir2_sf.c */ -extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, - struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, - int size, xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_sf_addname(struct xfs_da_args *args); -extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_sf_removename(struct xfs_da_args *args); -extern int xfs_dir2_sf_replace(struct xfs_da_args *args); - -/* xfs_dir2_readdir.c */ -extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, - size_t bufsize); - -#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c deleted file mode 100644 index ab3563b87995..000000000000 --- a/fs/xfs/xfs_dir2_sf.c +++ /dev/null @@ -1,1184 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_error.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_trace.h" -#include "xfs_dinode.h" - -/* - * Prototypes for internal functions. - */ -static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args, - xfs_dir2_sf_entry_t *sfep, - xfs_dir2_data_aoff_t offset, - int new_isize); -static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange, - int new_isize); -static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange, - xfs_dir2_sf_entry_t **sfepp, - xfs_dir2_data_aoff_t *offsetp); -#ifdef DEBUG -static void xfs_dir2_sf_check(xfs_da_args_t *args); -#else -#define xfs_dir2_sf_check(args) -#endif /* DEBUG */ -#if XFS_BIG_INUMS -static void xfs_dir2_sf_toino4(xfs_da_args_t *args); -static void xfs_dir2_sf_toino8(xfs_da_args_t *args); -#endif /* XFS_BIG_INUMS */ - -/* - * Given a block directory (dp/block), calculate its size as a shortform (sf) - * directory and a header for the sf directory, if it will fit it the - * space currently present in the inode. If it won't fit, the output - * size is too big (but not accurate). - */ -int /* size for sf form */ -xfs_dir2_block_sfsize( - xfs_inode_t *dp, /* incore inode pointer */ - xfs_dir2_data_hdr_t *hdr, /* block directory data */ - xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ -{ - xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_leaf_entry_t *blp; /* leaf area of the block */ - xfs_dir2_block_tail_t *btp; /* tail area of the block */ - int count; /* shortform entry count */ - xfs_dir2_data_entry_t *dep; /* data entry in the block */ - int i; /* block entry index */ - int i8count; /* count of big-inode entries */ - int isdot; /* entry is "." */ - int isdotdot; /* entry is ".." */ - xfs_mount_t *mp; /* mount structure pointer */ - int namelen; /* total name bytes */ - xfs_ino_t parent = 0; /* parent inode number */ - int size=0; /* total computed size */ - int has_ftype; - struct xfs_da_geometry *geo; - - mp = dp->i_mount; - geo = mp->m_dir_geo; - - /* - * if there is a filetype field, add the extra byte to the namelen - * for each entry that we see. - */ - has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; - - count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(geo, hdr); - blp = xfs_dir2_block_leaf_p(btp); - - /* - * Iterate over the block's data entries by using the leaf pointers. - */ - for (i = 0; i < be32_to_cpu(btp->count); i++) { - if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR) - continue; - /* - * Calculate the pointer to the entry at hand. - */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(geo, addr)); - /* - * Detect . and .., so we can special-case them. - * . is not included in sf directories. - * .. is included by just the parent inode number. - */ - isdot = dep->namelen == 1 && dep->name[0] == '.'; - isdotdot = - dep->namelen == 2 && - dep->name[0] == '.' && dep->name[1] == '.'; -#if XFS_BIG_INUMS - if (!isdot) - i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; -#endif - /* take into account the file type field */ - if (!isdot && !isdotdot) { - count++; - namelen += dep->namelen + has_ftype; - } else if (isdotdot) - parent = be64_to_cpu(dep->inumber); - /* - * Calculate the new size, see if we should give up yet. - */ - size = xfs_dir2_sf_hdr_size(i8count) + /* header */ - count + /* namelen */ - count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ - namelen + /* name */ - (i8count ? /* inumber */ - (uint)sizeof(xfs_dir2_ino8_t) * count : - (uint)sizeof(xfs_dir2_ino4_t) * count); - if (size > XFS_IFORK_DSIZE(dp)) - return size; /* size value is a failure */ - } - /* - * Create the output header, if it worked. - */ - sfhp->count = count; - sfhp->i8count = i8count; - dp->d_ops->sf_put_parent_ino(sfhp, parent); - return size; -} - -/* - * Convert a block format directory to shortform. - * Caller has already checked that it will fit, and built us a header. - */ -int /* error */ -xfs_dir2_block_to_sf( - xfs_da_args_t *args, /* operation arguments */ - struct xfs_buf *bp, - int size, /* shortform directory size */ - xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ -{ - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* unused data pointer */ - char *endptr; /* end of data entries */ - int error; /* error return value */ - int logflags; /* inode logging flags */ - xfs_mount_t *mp; /* filesystem mount point */ - char *ptr; /* current data pointer */ - xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ - xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ - - trace_xfs_dir2_block_to_sf(args); - - dp = args->dp; - mp = dp->i_mount; - - /* - * allocate a temporary destination buffer the size of the inode - * to format the data into. Once we have formatted the data, we - * can free the block and copy the formatted data into the inode literal - * area. - */ - dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); - hdr = bp->b_addr; - - /* - * Copy the header into the newly allocate local space. - */ - sfp = (xfs_dir2_sf_hdr_t *)dst; - memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); - - /* - * Set up to loop over the block's entries. - */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); - ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - sfep = xfs_dir2_sf_firstentry(sfp); - /* - * Loop over the active and unused entries. - * Stop when we reach the leaf/tail portion of the block. - */ - while (ptr < endptr) { - /* - * If it's unused, just skip over it. - */ - dup = (xfs_dir2_data_unused_t *)ptr; - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ptr += be16_to_cpu(dup->length); - continue; - } - dep = (xfs_dir2_data_entry_t *)ptr; - /* - * Skip . - */ - if (dep->namelen == 1 && dep->name[0] == '.') - ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino); - /* - * Skip .., but make sure the inode number is right. - */ - else if (dep->namelen == 2 && - dep->name[0] == '.' && dep->name[1] == '.') - ASSERT(be64_to_cpu(dep->inumber) == - dp->d_ops->sf_get_parent_ino(sfp)); - /* - * Normal entry, copy it into shortform. - */ - else { - sfep->namelen = dep->namelen; - xfs_dir2_sf_put_offset(sfep, - (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); - memcpy(sfep->name, dep->name, dep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - be64_to_cpu(dep->inumber)); - dp->d_ops->sf_put_ftype(sfep, - dp->d_ops->data_get_ftype(dep)); - - sfep = dp->d_ops->sf_nextentry(sfp, sfep); - } - ptr += dp->d_ops->data_entsize(dep->namelen); - } - ASSERT((char *)sfep - (char *)sfp == size); - - /* now we are done with the block, we can shrink the inode */ - logflags = XFS_ILOG_CORE; - error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); - if (error) { - ASSERT(error != ENOSPC); - goto out; - } - - /* - * The buffer is now unconditionally gone, whether - * xfs_dir2_shrink_inode worked or not. - * - * Convert the inode to local format and copy the data in. - */ - dp->i_df.if_flags &= ~XFS_IFEXTENTS; - dp->i_df.if_flags |= XFS_IFINLINE; - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; - ASSERT(dp->i_df.if_bytes == 0); - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - - logflags |= XFS_ILOG_DDATA; - memcpy(dp->i_df.if_u1.if_data, dst, size); - dp->i_d.di_size = size; - xfs_dir2_sf_check(args); -out: - xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(dst); - return error; -} - -/* - * Add a name to a shortform directory. - * There are two algorithms, "easy" and "hard" which we decide on - * before changing anything. - * Convert to block form if necessary, if the new entry won't fit. - */ -int /* error */ -xfs_dir2_sf_addname( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int error; /* error return value */ - int incr_isize; /* total change in size */ - int new_isize; /* di_size after adding name */ - int objchange; /* changing to 8-byte inodes */ - xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ - int pick; /* which algorithm to use */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ - - trace_xfs_dir2_sf_addname(args); - - ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Make sure the shortform value has some of its header. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); - /* - * Compute entry (and change in) size. - */ - incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); - objchange = 0; -#if XFS_BIG_INUMS - /* - * Do we have to change to 8 byte inodes? - */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { - /* - * Yes, adjust the inode size. old count + (parent + new) - */ - incr_isize += - (sfp->count + 2) * - ((uint)sizeof(xfs_dir2_ino8_t) - - (uint)sizeof(xfs_dir2_ino4_t)); - objchange = 1; - } -#endif - new_isize = (int)dp->i_d.di_size + incr_isize; - /* - * Won't fit as shortform any more (due to size), - * or the pick routine says it won't (due to offset values). - */ - if (new_isize > XFS_IFORK_DSIZE(dp) || - (pick = - xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) { - /* - * Just checking or no space reservation, it doesn't fit. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return ENOSPC; - /* - * Convert to block form then add the name. - */ - error = xfs_dir2_sf_to_block(args); - if (error) - return error; - return xfs_dir2_block_addname(args); - } - /* - * Just checking, it fits. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; - /* - * Do it the easy way - just add it at the end. - */ - if (pick == 1) - xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize); - /* - * Do it the hard way - look for a place to insert the new entry. - * Convert to 8 byte inode numbers first if necessary. - */ - else { - ASSERT(pick == 2); -#if XFS_BIG_INUMS - if (objchange) - xfs_dir2_sf_toino8(args); -#endif - xfs_dir2_sf_addname_hard(args, objchange, new_isize); - } - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - return 0; -} - -/* - * Add the new entry the "easy" way. - * This is copying the old directory and adding the new entry at the end. - * Since it's sorted by "offset" we need room after the last offset - * that's already there, and then room to convert to a block directory. - * This is already checked by the pick routine. - */ -static void -xfs_dir2_sf_addname_easy( - xfs_da_args_t *args, /* operation arguments */ - xfs_dir2_sf_entry_t *sfep, /* pointer to new entry */ - xfs_dir2_data_aoff_t offset, /* offset to use for new ent */ - int new_isize) /* new directory size */ -{ - int byteoff; /* byte offset in sf dir */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - - dp = args->dp; - - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - byteoff = (int)((char *)sfep - (char *)sfp); - /* - * Grow the in-inode space. - */ - xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), - XFS_DATA_FORK); - /* - * Need to set up again due to realloc of the inode data. - */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); - /* - * Fill in the new entry. - */ - sfep->namelen = args->namelen; - xfs_dir2_sf_put_offset(sfep, offset); - memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); - - /* - * Update the header and inode. - */ - sfp->count++; -#if XFS_BIG_INUMS - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) - sfp->i8count++; -#endif - dp->i_d.di_size = new_isize; - xfs_dir2_sf_check(args); -} - -/* - * Add the new entry the "hard" way. - * The caller has already converted to 8 byte inode numbers if necessary, - * in which case we need to leave the i8count at 1. - * Find a hole that the new entry will fit into, and copy - * the first part of the entries, the new entry, and the last part of - * the entries. - */ -/* ARGSUSED */ -static void -xfs_dir2_sf_addname_hard( - xfs_da_args_t *args, /* operation arguments */ - int objchange, /* changing inode number size */ - int new_isize) /* new directory size */ -{ - int add_datasize; /* data size need for new ent */ - char *buf; /* buffer for old */ - xfs_inode_t *dp; /* incore directory inode */ - int eof; /* reached end of old dir */ - int nbytes; /* temp for byte copies */ - xfs_dir2_data_aoff_t new_offset; /* next offset value */ - xfs_dir2_data_aoff_t offset; /* current offset value */ - int old_isize; /* previous di_size */ - xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ - xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ - xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ - xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ - struct xfs_mount *mp; - - /* - * Copy the old directory to the stack buffer. - */ - dp = args->dp; - mp = dp->i_mount; - - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - old_isize = (int)dp->i_d.di_size; - buf = kmem_alloc(old_isize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - memcpy(oldsfp, sfp, old_isize); - /* - * Loop over the old directory finding the place we're going - * to insert the new entry. - * If it's going to end up at the end then oldsfep will point there. - */ - for (offset = dp->d_ops->data_first_offset, - oldsfep = xfs_dir2_sf_firstentry(oldsfp), - add_datasize = dp->d_ops->data_entsize(args->namelen), - eof = (char *)oldsfep == &buf[old_isize]; - !eof; - offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), - eof = (char *)oldsfep == &buf[old_isize]) { - new_offset = xfs_dir2_sf_get_offset(oldsfep); - if (offset + add_datasize <= new_offset) - break; - } - /* - * Get rid of the old directory, then allocate space for - * the new one. We do this so xfs_idata_realloc won't copy - * the data. - */ - xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK); - xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); - /* - * Reset the pointer since the buffer was reallocated. - */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - /* - * Copy the first part of the directory, including the header. - */ - nbytes = (int)((char *)oldsfep - (char *)oldsfp); - memcpy(sfp, oldsfp, nbytes); - sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes); - /* - * Fill in the new entry, and update the header counts. - */ - sfep->namelen = args->namelen; - xfs_dir2_sf_put_offset(sfep, offset); - memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); - sfp->count++; -#if XFS_BIG_INUMS - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) - sfp->i8count++; -#endif - /* - * If there's more left to copy, do that. - */ - if (!eof) { - sfep = dp->d_ops->sf_nextentry(sfp, sfep); - memcpy(sfep, oldsfep, old_isize - nbytes); - } - kmem_free(buf); - dp->i_d.di_size = new_isize; - xfs_dir2_sf_check(args); -} - -/* - * Decide if the new entry will fit at all. - * If it will fit, pick between adding the new entry to the end (easy) - * or somewhere else (hard). - * Return 0 (won't fit), 1 (easy), 2 (hard). - */ -/*ARGSUSED*/ -static int /* pick result */ -xfs_dir2_sf_addname_pick( - xfs_da_args_t *args, /* operation arguments */ - int objchange, /* inode # size changes */ - xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */ - xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int holefit; /* found hole it will fit in */ - int i; /* entry number */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_data_aoff_t offset; /* data block offset */ - xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - int size; /* entry's data size */ - int used; /* data bytes used */ - - dp = args->dp; - mp = dp->i_mount; - - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - size = dp->d_ops->data_entsize(args->namelen); - offset = dp->d_ops->data_first_offset; - sfep = xfs_dir2_sf_firstentry(sfp); - holefit = 0; - /* - * Loop over sf entries. - * Keep track of data offset and whether we've seen a place - * to insert the new entry. - */ - for (i = 0; i < sfp->count; i++) { - if (!holefit) - holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); - offset = xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - sfep = dp->d_ops->sf_nextentry(sfp, sfep); - } - /* - * Calculate data bytes used excluding the new entry, if this - * was a data block (block form directory). - */ - used = offset + - (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t); - /* - * If it won't fit in a block form then we can't insert it, - * we'll go back, convert to block, then try the insert and convert - * to leaf. - */ - if (used + (holefit ? 0 : size) > args->geo->blksize) - return 0; - /* - * If changing the inode number size, do it the hard way. - */ -#if XFS_BIG_INUMS - if (objchange) { - return 2; - } -#else - ASSERT(objchange == 0); -#endif - /* - * If it won't fit at the end then do it the hard way (use the hole). - */ - if (used + size > args->geo->blksize) - return 2; - /* - * Do it the easy way. - */ - *sfepp = sfep; - *offsetp = offset; - return 1; -} - -#ifdef DEBUG -/* - * Check consistency of shortform directory, assert if bad. - */ -static void -xfs_dir2_sf_check( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int i; /* entry number */ - int i8count; /* number of big inode#s */ - xfs_ino_t ino; /* entry inode number */ - int offset; /* data offset */ - xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - struct xfs_mount *mp; - - dp = args->dp; - mp = dp->i_mount; - - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = dp->d_ops->data_first_offset; - ino = dp->d_ops->sf_get_parent_ino(sfp); - i8count = ino > XFS_DIR2_MAX_SHORT_INUM; - - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { - ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = dp->d_ops->sf_get_ino(sfp, sfep); - i8count += ino > XFS_DIR2_MAX_SHORT_INUM; - offset = - xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); - } - ASSERT(i8count == sfp->i8count); - ASSERT(XFS_BIG_INUMS || i8count == 0); - ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); - ASSERT(offset + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize); -} -#endif /* DEBUG */ - -/* - * Create a new (shortform) directory. - */ -int /* error, always 0 */ -xfs_dir2_sf_create( - xfs_da_args_t *args, /* operation arguments */ - xfs_ino_t pino) /* parent inode number */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int i8count; /* parent inode is an 8-byte number */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - int size; /* directory size */ - - trace_xfs_dir2_sf_create(args); - - dp = args->dp; - - ASSERT(dp != NULL); - ASSERT(dp->i_d.di_size == 0); - /* - * If it's currently a zero-length extent file, - * convert it to local format. - */ - if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { - dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); - dp->i_df.if_flags |= XFS_IFINLINE; - } - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - ASSERT(dp->i_df.if_bytes == 0); - i8count = pino > XFS_DIR2_MAX_SHORT_INUM; - size = xfs_dir2_sf_hdr_size(i8count); - /* - * Make a buffer for the data. - */ - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - /* - * Fill in the header, - */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - sfp->i8count = i8count; - /* - * Now can put in the inode number, since i8count is set. - */ - dp->d_ops->sf_put_parent_ino(sfp, pino); - sfp->count = 0; - dp->i_d.di_size = size; - xfs_dir2_sf_check(args); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - return 0; -} - -/* - * Lookup an entry in a shortform directory. - * Returns EEXIST if found, ENOENT if not found. - */ -int /* error */ -xfs_dir2_sf_lookup( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int i; /* entry index */ - int error; - xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - enum xfs_dacmp cmp; /* comparison result */ - xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ - - trace_xfs_dir2_sf_lookup(args); - - xfs_dir2_sf_check(args); - dp = args->dp; - - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Bail out if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); - /* - * Special case for . - */ - if (args->namelen == 1 && args->name[0] == '.') { - args->inumber = dp->i_ino; - args->cmpresult = XFS_CMP_EXACT; - args->filetype = XFS_DIR3_FT_DIR; - return EEXIST; - } - /* - * Special case for .. - */ - if (args->namelen == 2 && - args->name[0] == '.' && args->name[1] == '.') { - args->inumber = dp->d_ops->sf_get_parent_ino(sfp); - args->cmpresult = XFS_CMP_EXACT; - args->filetype = XFS_DIR3_FT_DIR; - return EEXIST; - } - /* - * Loop over all the entries trying to match ours. - */ - ci_sfep = NULL; - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { - /* - * Compare name and if it's an exact match, return the inode - * number. If it's the first case-insensitive match, store the - * inode number and continue looking for an exact match. - */ - cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name, - sfep->namelen); - if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { - args->cmpresult = cmp; - args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); - args->filetype = dp->d_ops->sf_get_ftype(sfep); - if (cmp == XFS_CMP_EXACT) - return EEXIST; - ci_sfep = sfep; - } - } - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - /* - * Here, we can only be doing a lookup (not a rename or replace). - * If a case-insensitive match was not found, return ENOENT. - */ - if (!ci_sfep) - return ENOENT; - /* otherwise process the CI match as required by the caller */ - error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); - return error; -} - -/* - * Remove an entry from a shortform directory. - */ -int /* error */ -xfs_dir2_sf_removename( - xfs_da_args_t *args) -{ - int byteoff; /* offset of removed entry */ - xfs_inode_t *dp; /* incore directory inode */ - int entsize; /* this entry's size */ - int i; /* shortform entry index */ - int newsize; /* new inode size */ - int oldsize; /* old inode size */ - xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - - trace_xfs_dir2_sf_removename(args); - - dp = args->dp; - - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - oldsize = (int)dp->i_d.di_size; - /* - * Bail out if the directory is way too short. - */ - if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return EIO; - } - ASSERT(dp->i_df.if_bytes == oldsize); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); - /* - * Loop over the old directory entries. - * Find the one we're deleting. - */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { - if (xfs_da_compname(args, sfep->name, sfep->namelen) == - XFS_CMP_EXACT) { - ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == - args->inumber); - break; - } - } - /* - * Didn't find it. - */ - if (i == sfp->count) - return ENOENT; - /* - * Calculate sizes. - */ - byteoff = (int)((char *)sfep - (char *)sfp); - entsize = dp->d_ops->sf_entsize(sfp, args->namelen); - newsize = oldsize - entsize; - /* - * Copy the part if any after the removed entry, sliding it down. - */ - if (byteoff + entsize < oldsize) - memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize, - oldsize - (byteoff + entsize)); - /* - * Fix up the header and file size. - */ - sfp->count--; - dp->i_d.di_size = newsize; - /* - * Reallocate, making it smaller. - */ - xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; -#if XFS_BIG_INUMS - /* - * Are we changing inode number size? - */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - if (sfp->i8count == 1) - xfs_dir2_sf_toino4(args); - else - sfp->i8count--; - } -#endif - xfs_dir2_sf_check(args); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); - return 0; -} - -/* - * Replace the inode number of an entry in a shortform directory. - */ -int /* error */ -xfs_dir2_sf_replace( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_inode_t *dp; /* incore directory inode */ - int i; /* entry index */ -#if XFS_BIG_INUMS || defined(DEBUG) - xfs_ino_t ino=0; /* entry old inode number */ -#endif -#if XFS_BIG_INUMS - int i8elevated; /* sf_toino8 set i8count=1 */ -#endif - xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - - trace_xfs_dir2_sf_replace(args); - - dp = args->dp; - - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Bail out if the shortform directory is way too small. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); -#if XFS_BIG_INUMS - /* - * New inode number is large, and need to convert to 8-byte inodes. - */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { - int error; /* error return value */ - int newsize; /* new inode size */ - - newsize = - dp->i_df.if_bytes + - (sfp->count + 1) * - ((uint)sizeof(xfs_dir2_ino8_t) - - (uint)sizeof(xfs_dir2_ino4_t)); - /* - * Won't fit as shortform, convert to block then do replace. - */ - if (newsize > XFS_IFORK_DSIZE(dp)) { - error = xfs_dir2_sf_to_block(args); - if (error) { - return error; - } - return xfs_dir2_block_replace(args); - } - /* - * Still fits, convert to 8-byte now. - */ - xfs_dir2_sf_toino8(args); - i8elevated = 1; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - } else - i8elevated = 0; -#endif - ASSERT(args->namelen != 1 || args->name[0] != '.'); - /* - * Replace ..'s entry. - */ - if (args->namelen == 2 && - args->name[0] == '.' && args->name[1] == '.') { -#if XFS_BIG_INUMS || defined(DEBUG) - ino = dp->d_ops->sf_get_parent_ino(sfp); - ASSERT(args->inumber != ino); -#endif - dp->d_ops->sf_put_parent_ino(sfp, args->inumber); - } - /* - * Normal entry, look for the name. - */ - else { - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { - if (xfs_da_compname(args, sfep->name, sfep->namelen) == - XFS_CMP_EXACT) { -#if XFS_BIG_INUMS || defined(DEBUG) - ino = dp->d_ops->sf_get_ino(sfp, sfep); - ASSERT(args->inumber != ino); -#endif - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); - break; - } - } - /* - * Didn't find it. - */ - if (i == sfp->count) { - ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); -#if XFS_BIG_INUMS - if (i8elevated) - xfs_dir2_sf_toino4(args); -#endif - return ENOENT; - } - } -#if XFS_BIG_INUMS - /* - * See if the old number was large, the new number is small. - */ - if (ino > XFS_DIR2_MAX_SHORT_INUM && - args->inumber <= XFS_DIR2_MAX_SHORT_INUM) { - /* - * And the old count was one, so need to convert to small. - */ - if (sfp->i8count == 1) - xfs_dir2_sf_toino4(args); - else - sfp->i8count--; - } - /* - * See if the old number was small, the new number is large. - */ - if (ino <= XFS_DIR2_MAX_SHORT_INUM && - args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - /* - * add to the i8count unless we just converted to 8-byte - * inodes (which does an implied i8count = 1) - */ - ASSERT(sfp->i8count != 0); - if (!i8elevated) - sfp->i8count++; - } -#endif - xfs_dir2_sf_check(args); - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); - return 0; -} - -#if XFS_BIG_INUMS -/* - * Convert from 8-byte inode numbers to 4-byte inode numbers. - * The last 8-byte inode number is gone, but the count is still 1. - */ -static void -xfs_dir2_sf_toino4( - xfs_da_args_t *args) /* operation arguments */ -{ - char *buf; /* old dir's buffer */ - xfs_inode_t *dp; /* incore directory inode */ - int i; /* entry index */ - int newsize; /* new inode size */ - xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ - int oldsize; /* old inode size */ - xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ - struct xfs_mount *mp; - - trace_xfs_dir2_sf_toino4(args); - - dp = args->dp; - mp = dp->i_mount; - - /* - * Copy the old directory to the buffer. - * Then nuke it from the inode, and add the new buffer to the inode. - * Don't want xfs_idata_realloc copying the data here. - */ - oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 1); - memcpy(buf, oldsfp, oldsize); - /* - * Compute the new inode size. - */ - newsize = - oldsize - - (oldsfp->count + 1) * - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); - xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); - xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); - /* - * Reset our pointers, the data has moved. - */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - /* - * Fill in the new header. - */ - sfp->count = oldsfp->count; - sfp->i8count = 0; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); - /* - * Copy the entries field by field. - */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), - oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { - sfep->namelen = oldsfep->namelen; - sfep->offset = oldsfep->offset; - memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); - } - /* - * Clean up the inode. - */ - kmem_free(buf); - dp->i_d.di_size = newsize; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); -} - -/* - * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. - * The new entry w/ an 8-byte inode number is not there yet; we leave with - * i8count set to 1, but no corresponding 8-byte entry. - */ -static void -xfs_dir2_sf_toino8( - xfs_da_args_t *args) /* operation arguments */ -{ - char *buf; /* old dir's buffer */ - xfs_inode_t *dp; /* incore directory inode */ - int i; /* entry index */ - int newsize; /* new inode size */ - xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ - int oldsize; /* old inode size */ - xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ - struct xfs_mount *mp; - - trace_xfs_dir2_sf_toino8(args); - - dp = args->dp; - mp = dp->i_mount; - - /* - * Copy the old directory to the buffer. - * Then nuke it from the inode, and add the new buffer to the inode. - * Don't want xfs_idata_realloc copying the data here. - */ - oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->i8count == 0); - memcpy(buf, oldsfp, oldsize); - /* - * Compute the new inode size (nb: entry count + 1 for parent) - */ - newsize = - oldsize + - (oldsfp->count + 1) * - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); - xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); - xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); - /* - * Reset our pointers, the data has moved. - */ - oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - /* - * Fill in the new header. - */ - sfp->count = oldsfp->count; - sfp->i8count = 1; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); - /* - * Copy the entries field by field. - */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), - oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { - sfep->namelen = oldsfep->namelen; - sfep->offset = oldsfep->offset; - memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); - } - /* - * Clean up the inode. - */ - kmem_free(buf); - dp->i_d.di_size = newsize; - xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); -} -#endif /* XFS_BIG_INUMS */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c deleted file mode 100644 index c2ac0c611ad8..000000000000 --- a/fs/xfs/xfs_dquot_buf.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_qm.h" -#include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_trace.h" - -int -xfs_calc_dquots_per_chunk( - unsigned int nbblks) /* basic block units */ -{ - unsigned int ndquots; - - ASSERT(nbblks > 0); - ndquots = BBTOB(nbblks); - do_div(ndquots, sizeof(xfs_dqblk_t)); - - return ndquots; -} - -/* - * Do some primitive error checking on ondisk dquot data structures. - */ -int -xfs_dqcheck( - struct xfs_mount *mp, - xfs_disk_dquot_t *ddq, - xfs_dqid_t id, - uint type, /* used only when IO_dorepair is true */ - uint flags, - char *str) -{ - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - - /* - * We can encounter an uninitialized dquot buffer for 2 reasons: - * 1. If we crash while deleting the quotainode(s), and those blks got - * used for user data. This is because we take the path of regular - * file deletion; however, the size field of quotainodes is never - * updated, so all the tricks that we play in itruncate_finish - * don't quite matter. - * - * 2. We don't play the quota buffers when there's a quotaoff logitem. - * But the allocation will be replayed so we'll end up with an - * uninitialized quota block. - * - * This is all fine; things are still consistent, and we haven't lost - * any quota information. Just don't complain about bad dquot blks. - */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } - - if (ddq->d_flags != XFS_DQ_USER && - ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } - - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } - - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } - - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; - - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); - - /* - * Typically, a repair is only requested by quotacheck. - */ - ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); - memset(d, 0, sizeof(xfs_dqblk_t)); - - d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); - d->dd_diskdq.d_version = XFS_DQUOT_VERSION; - d->dd_diskdq.d_flags = type; - d->dd_diskdq.d_id = cpu_to_be32(id); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); - xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF); - } - - return errs; -} - -STATIC bool -xfs_dquot_buf_verify_crc( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - int ndquots; - int i; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return true; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_calc_dquots_per_chunk( - XFS_BB_TO_FSB(mp, bp->b_length)); - - for (i = 0; i < ndquots; i++, d++) { - if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF)) - return false; - if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) - return false; - } - return true; -} - -STATIC bool -xfs_dquot_buf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - xfs_dqid_t id = 0; - int ndquots; - int i; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_calc_dquots_per_chunk(bp->b_length); - - /* - * On the first read of the buffer, verify that each dquot is valid. - * We don't know what the id of the dquot is supposed to be, just that - * they should be increasing monotonically within the buffer. If the - * first id is corrupt, then it will fail on the second dquot in the - * buffer so corruptions could point to the wrong dquot in this case. - */ - for (i = 0; i < ndquots; i++) { - struct xfs_disk_dquot *ddq; - int error; - - ddq = &d[i].dd_diskdq; - - if (i == 0) - id = be32_to_cpu(ddq->d_id); - - error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, - "xfs_dquot_buf_verify"); - if (error) - return false; - } - return true; -} - -static void -xfs_dquot_buf_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -/* - * we don't calculate the CRC here as that is done when the dquot is flushed to - * the buffer after the update is done. This ensures that the dquot in the - * buffer always has an up-to-date CRC value. - */ -static void -xfs_dquot_buf_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify(mp, bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } -} - -const struct xfs_buf_ops xfs_dquot_buf_ops = { - .verify_read = xfs_dquot_buf_read_verify, - .verify_write = xfs_dquot_buf_write_verify, -}; - diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c deleted file mode 100644 index 16fb63a9bc5e..000000000000 --- a/fs/xfs/xfs_ialloc.c +++ /dev/null @@ -1,2189 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_bmap.h" -#include "xfs_cksum.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" -#include "xfs_icreate_item.h" -#include "xfs_icache.h" -#include "xfs_dinode.h" -#include "xfs_trace.h" - - -/* - * Allocation group level functions. - */ -static inline int -xfs_ialloc_cluster_alignment( - xfs_alloc_arg_t *args) -{ - if (xfs_sb_version_hasalign(&args->mp->m_sb) && - args->mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) - return args->mp->m_sb.sb_inoalignmt; - return 1; -} - -/* - * Lookup a record by ino in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - xfs_lookup_t dir, /* <=, >=, == */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = 0; - cur->bc_rec.i.ir_free = 0; - return xfs_btree_lookup(cur, dir, stat); -} - -/* - * Update the record referred to by cur to the value given. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -STATIC int /* error */ -xfs_inobt_update( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_inobt_rec_incore_t *irec) /* btree record */ -{ - union xfs_btree_rec rec; - - rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); - rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); - rec.inobt.ir_free = cpu_to_be64(irec->ir_free); - return xfs_btree_update(cur, &rec); -} - -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_inobt_get_rec( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_inobt_rec_incore_t *irec, /* btree record */ - int *stat) /* output: success/failure */ -{ - union xfs_btree_rec *rec; - int error; - - error = xfs_btree_get_rec(cur, &rec, stat); - if (!error && *stat == 1) { - irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); - irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); - irec->ir_free = be64_to_cpu(rec->inobt.ir_free); - } - return error; -} - -/* - * Insert a single inobt record. Cursor must already point to desired location. - */ -STATIC int -xfs_inobt_insert_rec( - struct xfs_btree_cur *cur, - __int32_t freecount, - xfs_inofree_t free, - int *stat) -{ - cur->bc_rec.i.ir_freecount = freecount; - cur->bc_rec.i.ir_free = free; - return xfs_btree_insert(cur, stat); -} - -/* - * Insert records describing a newly allocated inode chunk into the inobt. - */ -STATIC int -xfs_inobt_insert( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_agino_t newino, - xfs_agino_t newlen, - xfs_btnum_t btnum) -{ - struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - xfs_agino_t thisino; - int i; - int error; - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); - - for (thisino = newino; - thisino < newino + newlen; - thisino += XFS_INODES_PER_CHUNK) { - error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - ASSERT(i == 0); - - error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, - XFS_INOBT_ALL_FREE, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - ASSERT(i == 1); - } - - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - - return 0; -} - -/* - * Verify that the number of free inodes in the AGI is correct. - */ -#ifdef DEBUG -STATIC int -xfs_check_agi_freecount( - struct xfs_btree_cur *cur, - struct xfs_agi *agi) -{ - if (cur->bc_nlevels == 1) { - xfs_inobt_rec_incore_t rec; - int freecount = 0; - int error; - int i; - - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); - if (error) - return error; - - do { - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - return error; - - if (i) { - freecount += rec.ir_freecount; - error = xfs_btree_increment(cur, 0, &i); - if (error) - return error; - } - } while (i == 1); - - if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) - ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); - } - return 0; -} -#else -#define xfs_check_agi_freecount(cur, agi) 0 -#endif - -/* - * Initialise a new set of inodes. When called without a transaction context - * (e.g. from recovery) we initiate a delayed write of the inode buffers rather - * than logging them (which in a transaction context puts them into the AIL - * for writeback rather than the xfsbufd queue). - */ -int -xfs_ialloc_inode_init( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct list_head *buffer_list, - xfs_agnumber_t agno, - xfs_agblock_t agbno, - xfs_agblock_t length, - unsigned int gen) -{ - struct xfs_buf *fbuf; - struct xfs_dinode *free; - int nbufs, blks_per_cluster, inodes_per_cluster; - int version; - int i, j; - xfs_daddr_t d; - xfs_ino_t ino = 0; - - /* - * Loop over the new block(s), filling in the inodes. For small block - * sizes, manipulate the inodes in buffers which are multiples of the - * blocks size. - */ - blks_per_cluster = xfs_icluster_size_fsb(mp); - inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; - nbufs = length / blks_per_cluster; - - /* - * Figure out what version number to use in the inodes we create. If - * the superblock version has caught up to the one that supports the new - * inode format, then use the new inode version. Otherwise use the old - * version so that old kernels will continue to be able to use the file - * system. - * - * For v3 inodes, we also need to write the inode number into the inode, - * so calculate the first inode number of the chunk here as - * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not - * across multiple filesystem blocks (such as a cluster) and so cannot - * be used in the cluster buffer loop below. - * - * Further, because we are writing the inode directly into the buffer - * and calculating a CRC on the entire inode, we have ot log the entire - * inode so that the entire range the CRC covers is present in the log. - * That means for v3 inode we log the entire buffer rather than just the - * inode cores. - */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { - version = 3; - ino = XFS_AGINO_TO_INO(mp, agno, - XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); - - /* - * log the initialisation that is about to take place as an - * logical operation. This means the transaction does not - * need to log the physical changes to the inode buffers as log - * recovery will know what initialisation is actually needed. - * Hence we only need to log the buffers as "ordered" buffers so - * they track in the AIL as if they were physically logged. - */ - if (tp) - xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, - mp->m_sb.sb_inodesize, length, gen); - } else - version = 2; - - for (j = 0; j < nbufs; j++) { - /* - * Get the block. - */ - d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); - fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize * blks_per_cluster, - XBF_UNMAPPED); - if (!fbuf) - return ENOMEM; - - /* Initialize the inode buffers and log them appropriately. */ - fbuf->b_ops = &xfs_inode_buf_ops; - xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); - for (i = 0; i < inodes_per_cluster; i++) { - int ioffset = i << mp->m_sb.sb_inodelog; - uint isize = xfs_dinode_size(version); - - free = xfs_make_iptr(mp, fbuf, i); - free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - free->di_version = version; - free->di_gen = cpu_to_be32(gen); - free->di_next_unlinked = cpu_to_be32(NULLAGINO); - - if (version == 3) { - free->di_ino = cpu_to_be64(ino); - ino++; - uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); - xfs_dinode_calc_crc(mp, free); - } else if (tp) { - /* just log the inode core */ - xfs_trans_log_buf(tp, fbuf, ioffset, - ioffset + isize - 1); - } - } - - if (tp) { - /* - * Mark the buffer as an inode allocation buffer so it - * sticks in AIL at the point of this allocation - * transaction. This ensures the they are on disk before - * the tail of the log can be moved past this - * transaction (i.e. by preventing relogging from moving - * it forward in the log). - */ - xfs_trans_inode_alloc_buf(tp, fbuf); - if (version == 3) { - /* - * Mark the buffer as ordered so that they are - * not physically logged in the transaction but - * still tracked in the AIL as part of the - * transaction and pin the log appropriately. - */ - xfs_trans_ordered_buf(tp, fbuf); - xfs_trans_log_buf(tp, fbuf, 0, - BBTOB(fbuf->b_length) - 1); - } - } else { - fbuf->b_flags |= XBF_DONE; - xfs_buf_delwri_queue(fbuf, buffer_list); - xfs_buf_relse(fbuf); - } - } - return 0; -} - -/* - * Allocate new inodes in the allocation group specified by agbp. - * Return 0 for success, else error code. - */ -STATIC int /* error code or 0 */ -xfs_ialloc_ag_alloc( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* alloc group buffer */ - int *alloc) -{ - xfs_agi_t *agi; /* allocation group header */ - xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_agnumber_t agno; - int error; - xfs_agino_t newino; /* new first inode's number */ - xfs_agino_t newlen; /* new number of inodes */ - int isaligned = 0; /* inode allocation at stripe unit */ - /* boundary */ - struct xfs_perag *pag; - - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = tp->t_mountp; - - /* - * Locking will ensure that we don't have two callers in here - * at one time. - */ - newlen = args.mp->m_ialloc_inos; - if (args.mp->m_maxicount && - args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) - return ENOSPC; - args.minlen = args.maxlen = args.mp->m_ialloc_blks; - /* - * First try to allocate inodes contiguous with the last-allocated - * chunk of inodes. If the filesystem is striped, this will fill - * an entire stripe unit with inodes. - */ - agi = XFS_BUF_TO_AGI(agbp); - newino = be32_to_cpu(agi->agi_newino); - agno = be32_to_cpu(agi->agi_seqno); - args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + - args.mp->m_ialloc_blks; - if (likely(newino != NULLAGINO && - (args.agbno < be32_to_cpu(agi->agi_length)))) { - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - args.type = XFS_ALLOCTYPE_THIS_BNO; - args.prod = 1; - - /* - * We need to take into account alignment here to ensure that - * we don't modify the free list if we fail to have an exact - * block. If we don't have an exact match, and every oher - * attempt allocation attempt fails, we'll end up cancelling - * a dirty transaction and shutting down. - * - * For an exact allocation, alignment must be 1, - * however we need to take cluster alignment into account when - * fixing up the freelist. Use the minalignslop field to - * indicate that extra blocks might be required for alignment, - * but not to use them in the actual exact allocation. - */ - args.alignment = 1; - args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; - - /* Allow space for the inode btree to split. */ - args.minleft = args.mp->m_in_maxlevels - 1; - if ((error = xfs_alloc_vextent(&args))) - return error; - - /* - * This request might have dirtied the transaction if the AG can - * satisfy the request, but the exact block was not available. - * If the allocation did fail, subsequent requests will relax - * the exact agbno requirement and increase the alignment - * instead. It is critical that the total size of the request - * (len + alignment + slop) does not increase from this point - * on, so reset minalignslop to ensure it is not included in - * subsequent requests. - */ - args.minalignslop = 0; - } else - args.fsbno = NULLFSBLOCK; - - if (unlikely(args.fsbno == NULLFSBLOCK)) { - /* - * Set the alignment for the allocation. - * If stripe alignment is turned on then align at stripe unit - * boundary. - * If the cluster size is smaller than a filesystem block - * then we're doing I/O for inodes in filesystem block size - * pieces, so don't need alignment anyway. - */ - isaligned = 0; - if (args.mp->m_sinoalign) { - ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); - args.alignment = args.mp->m_dalign; - isaligned = 1; - } else - args.alignment = xfs_ialloc_cluster_alignment(&args); - /* - * Need to figure out where to allocate the inode blocks. - * Ideally they should be spaced out through the a.g. - * For now, just allocate blocks up front. - */ - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - /* - * Allocate a fixed-size extent of inodes. - */ - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.prod = 1; - /* - * Allow space for the inode btree to split. - */ - args.minleft = args.mp->m_in_maxlevels - 1; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - - /* - * If stripe alignment is turned on, then try again with cluster - * alignment. - */ - if (isaligned && args.fsbno == NULLFSBLOCK) { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - args.alignment = xfs_ialloc_cluster_alignment(&args); - if ((error = xfs_alloc_vextent(&args))) - return error; - } - - if (args.fsbno == NULLFSBLOCK) { - *alloc = 0; - return 0; - } - ASSERT(args.len == args.minlen); - - /* - * Stamp and write the inode buffers. - * - * Seed the new inode cluster with a random generation number. This - * prevents short-term reuse of generation numbers if a chunk is - * freed and then immediately reallocated. We use random numbers - * rather than a linear progression to prevent the next generation - * number from being easily guessable. - */ - error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, - args.len, prandom_u32()); - - if (error) - return error; - /* - * Convert the results. - */ - newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); - be32_add_cpu(&agi->agi_count, newlen); - be32_add_cpu(&agi->agi_freecount, newlen); - pag = xfs_perag_get(args.mp, agno); - pag->pagi_freecount += newlen; - xfs_perag_put(pag); - agi->agi_newino = cpu_to_be32(newino); - - /* - * Insert records describing the new inode chunk into the btrees. - */ - error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, - XFS_BTNUM_INO); - if (error) - return error; - - if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { - error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, - XFS_BTNUM_FINO); - if (error) - return error; - } - /* - * Log allocation group header fields - */ - xfs_ialloc_log_agi(tp, agbp, - XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); - /* - * Modify/log superblock values for inode count and inode free count. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); - xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); - *alloc = 1; - return 0; -} - -STATIC xfs_agnumber_t -xfs_ialloc_next_ag( - xfs_mount_t *mp) -{ - xfs_agnumber_t agno; - - spin_lock(&mp->m_agirotor_lock); - agno = mp->m_agirotor; - if (++mp->m_agirotor >= mp->m_maxagi) - mp->m_agirotor = 0; - spin_unlock(&mp->m_agirotor_lock); - - return agno; -} - -/* - * Select an allocation group to look for a free inode in, based on the parent - * inode and the mode. Return the allocation group buffer. - */ -STATIC xfs_agnumber_t -xfs_ialloc_ag_select( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t parent, /* parent directory inode number */ - umode_t mode, /* bits set to indicate file type */ - int okalloc) /* ok to allocate more space */ -{ - xfs_agnumber_t agcount; /* number of ag's in the filesystem */ - xfs_agnumber_t agno; /* current ag number */ - int flags; /* alloc buffer locking flags */ - xfs_extlen_t ineed; /* blocks needed for inode allocation */ - xfs_extlen_t longest = 0; /* longest extent available */ - xfs_mount_t *mp; /* mount point structure */ - int needspace; /* file mode implies space allocated */ - xfs_perag_t *pag; /* per allocation group data */ - xfs_agnumber_t pagno; /* parent (starting) ag number */ - int error; - - /* - * Files of these types need at least one block if length > 0 - * (and they won't fit in the inode, but that's hard to figure out). - */ - needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); - mp = tp->t_mountp; - agcount = mp->m_maxagi; - if (S_ISDIR(mode)) - pagno = xfs_ialloc_next_ag(mp); - else { - pagno = XFS_INO_TO_AGNO(mp, parent); - if (pagno >= agcount) - pagno = 0; - } - - ASSERT(pagno < agcount); - - /* - * Loop through allocation groups, looking for one with a little - * free space in it. Note we don't look for free inodes, exactly. - * Instead, we include whether there is a need to allocate inodes - * to mean that blocks must be allocated for them, - * if none are currently free. - */ - agno = pagno; - flags = XFS_ALLOC_FLAG_TRYLOCK; - for (;;) { - pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto nextag; - } - - if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); - if (error) - goto nextag; - } - - if (pag->pagi_freecount) { - xfs_perag_put(pag); - return agno; - } - - if (!okalloc) - goto nextag; - - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, agno, flags); - if (error) - goto nextag; - } - - /* - * Is there enough free space for the file plus a block of - * inodes? (if we need to allocate some)? - */ - ineed = mp->m_ialloc_blks; - longest = pag->pagf_longest; - if (!longest) - longest = pag->pagf_flcount > 0; - - if (pag->pagf_freeblks >= needspace + ineed && - longest >= ineed) { - xfs_perag_put(pag); - return agno; - } -nextag: - xfs_perag_put(pag); - /* - * No point in iterating over the rest, if we're shutting - * down. - */ - if (XFS_FORCED_SHUTDOWN(mp)) - return NULLAGNUMBER; - agno++; - if (agno >= agcount) - agno = 0; - if (agno == pagno) { - if (flags == 0) - return NULLAGNUMBER; - flags = 0; - } - } -} - -/* - * Try to retrieve the next record to the left/right from the current one. - */ -STATIC int -xfs_ialloc_next_rec( - struct xfs_btree_cur *cur, - xfs_inobt_rec_incore_t *rec, - int *done, - int left) -{ - int error; - int i; - - if (left) - error = xfs_btree_decrement(cur, 0, &i); - else - error = xfs_btree_increment(cur, 0, &i); - - if (error) - return error; - *done = !i; - if (i) { - error = xfs_inobt_get_rec(cur, rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - - return 0; -} - -STATIC int -xfs_ialloc_get_rec( - struct xfs_btree_cur *cur, - xfs_agino_t agino, - xfs_inobt_rec_incore_t *rec, - int *done) -{ - int error; - int i; - - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); - if (error) - return error; - *done = !i; - if (i) { - error = xfs_inobt_get_rec(cur, rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - } - - return 0; -} - -/* - * Allocate an inode using the inobt-only algorithm. - */ -STATIC int -xfs_dialloc_ag_inobt( - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_ino_t parent, - xfs_ino_t *inop) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); - xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); - struct xfs_perag *pag; - struct xfs_btree_cur *cur, *tcur; - struct xfs_inobt_rec_incore rec, trec; - xfs_ino_t ino; - int error; - int offset; - int i, j; - - pag = xfs_perag_get(mp, agno); - - ASSERT(pag->pagi_init); - ASSERT(pag->pagi_inodeok); - ASSERT(pag->pagi_freecount > 0); - - restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); - /* - * If pagino is 0 (this is the root inode allocation) use newino. - * This must work because we've just allocated some. - */ - if (!pagino) - pagino = be32_to_cpu(agi->agi_newino); - - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error0; - - /* - * If in the same AG as the parent, try to get near the parent. - */ - if (pagno == agno) { - int doneleft; /* done, to the left */ - int doneright; /* done, to the right */ - int searchdistance = 10; - - error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - error = xfs_inobt_get_rec(cur, &rec, &j); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(j == 1, error0); - - if (rec.ir_freecount > 0) { - /* - * Found a free inode in the same chunk - * as the parent, done. - */ - goto alloc_inode; - } - - - /* - * In the same AG as parent, but parent's chunk is full. - */ - - /* duplicate the cursor, search left & right simultaneously */ - error = xfs_btree_dup_cursor(cur, &tcur); - if (error) - goto error0; - - /* - * Skip to last blocks looked up if same parent inode. - */ - if (pagino != NULLAGINO && - pag->pagl_pagino == pagino && - pag->pagl_leftrec != NULLAGINO && - pag->pagl_rightrec != NULLAGINO) { - error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, - &trec, &doneleft); - if (error) - goto error1; - - error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, - &rec, &doneright); - if (error) - goto error1; - } else { - /* search left with tcur, back up 1 record */ - error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); - if (error) - goto error1; - - /* search right with cur, go forward 1 record. */ - error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); - if (error) - goto error1; - } - - /* - * Loop until we find an inode chunk with a free inode. - */ - while (!doneleft || !doneright) { - int useleft; /* using left inode chunk this time */ - - if (!--searchdistance) { - /* - * Not in range - save last search - * location and allocate a new inode - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - pag->pagl_leftrec = trec.ir_startino; - pag->pagl_rightrec = rec.ir_startino; - pag->pagl_pagino = pagino; - goto newino; - } - - /* figure out the closer block if both are valid. */ - if (!doneleft && !doneright) { - useleft = pagino - - (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < - rec.ir_startino - pagino; - } else { - useleft = !doneleft; - } - - /* free inodes to the left? */ - if (useleft && trec.ir_freecount) { - rec = trec; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - cur = tcur; - - pag->pagl_leftrec = trec.ir_startino; - pag->pagl_rightrec = rec.ir_startino; - pag->pagl_pagino = pagino; - goto alloc_inode; - } - - /* free inodes to the right? */ - if (!useleft && rec.ir_freecount) { - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - - pag->pagl_leftrec = trec.ir_startino; - pag->pagl_rightrec = rec.ir_startino; - pag->pagl_pagino = pagino; - goto alloc_inode; - } - - /* get next record to check */ - if (useleft) { - error = xfs_ialloc_next_rec(tcur, &trec, - &doneleft, 1); - } else { - error = xfs_ialloc_next_rec(cur, &rec, - &doneright, 0); - } - if (error) - goto error1; - } - - /* - * We've reached the end of the btree. because - * we are only searching a small chunk of the - * btree each search, there is obviously free - * inodes closer to the parent inode than we - * are now. restart the search again. - */ - pag->pagl_pagino = NULLAGINO; - pag->pagl_leftrec = NULLAGINO; - pag->pagl_rightrec = NULLAGINO; - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - goto restart_pagno; - } - - /* - * In a different AG from the parent. - * See if the most recently allocated block has any free. - */ -newino: - if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { - error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), - XFS_LOOKUP_EQ, &i); - if (error) - goto error0; - - if (i == 1) { - error = xfs_inobt_get_rec(cur, &rec, &j); - if (error) - goto error0; - - if (j == 1 && rec.ir_freecount > 0) { - /* - * The last chunk allocated in the group - * still has a free inode. - */ - goto alloc_inode; - } - } - } - - /* - * None left in the last group, search the whole AG - */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - - for (;;) { - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (rec.ir_freecount > 0) - break; - error = xfs_btree_increment(cur, 0, &i); - if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - -alloc_inode: - offset = xfs_lowbit64(rec.ir_free); - ASSERT(offset >= 0); - ASSERT(offset < XFS_INODES_PER_CHUNK); - ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % - XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); - rec.ir_free &= ~XFS_INOBT_MASK(offset); - rec.ir_freecount--; - error = xfs_inobt_update(cur, &rec); - if (error) - goto error0; - be32_add_cpu(&agi->agi_freecount, -1); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - pag->pagi_freecount--; - - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error0; - - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); - xfs_perag_put(pag); - *inop = ino; - return 0; -error1: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); -error0: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_perag_put(pag); - return error; -} - -/* - * Use the free inode btree to allocate an inode based on distance from the - * parent. Note that the provided cursor may be deleted and replaced. - */ -STATIC int -xfs_dialloc_ag_finobt_near( - xfs_agino_t pagino, - struct xfs_btree_cur **ocur, - struct xfs_inobt_rec_incore *rec) -{ - struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ - struct xfs_btree_cur *rcur; /* right search cursor */ - struct xfs_inobt_rec_incore rrec; - int error; - int i, j; - - error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); - if (error) - return error; - - if (i == 1) { - error = xfs_inobt_get_rec(lcur, rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - - /* - * See if we've landed in the parent inode record. The finobt - * only tracks chunks with at least one free inode, so record - * existence is enough. - */ - if (pagino >= rec->ir_startino && - pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) - return 0; - } - - error = xfs_btree_dup_cursor(lcur, &rcur); - if (error) - return error; - - error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); - if (error) - goto error_rcur; - if (j == 1) { - error = xfs_inobt_get_rec(rcur, &rrec, &j); - if (error) - goto error_rcur; - XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); - } - - XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); - if (i == 1 && j == 1) { - /* - * Both the left and right records are valid. Choose the closer - * inode chunk to the target. - */ - if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > - (rrec.ir_startino - pagino)) { - *rec = rrec; - xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); - *ocur = rcur; - } else { - xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); - } - } else if (j == 1) { - /* only the right record is valid */ - *rec = rrec; - xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); - *ocur = rcur; - } else if (i == 1) { - /* only the left record is valid */ - xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); - } - - return 0; - -error_rcur: - xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Use the free inode btree to find a free inode based on a newino hint. If - * the hint is NULL, find the first free inode in the AG. - */ -STATIC int -xfs_dialloc_ag_finobt_newino( - struct xfs_agi *agi, - struct xfs_btree_cur *cur, - struct xfs_inobt_rec_incore *rec) -{ - int error; - int i; - - if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { - error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, - &i); - if (error) - return error; - if (i == 1) { - error = xfs_inobt_get_rec(cur, rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - - return 0; - } - } - - /* - * Find the first inode available in the AG. - */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - - error = xfs_inobt_get_rec(cur, rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - - return 0; -} - -/* - * Update the inobt based on a modification made to the finobt. Also ensure that - * the records from both trees are equivalent post-modification. - */ -STATIC int -xfs_dialloc_ag_update_inobt( - struct xfs_btree_cur *cur, /* inobt cursor */ - struct xfs_inobt_rec_incore *frec, /* finobt record */ - int offset) /* inode offset */ -{ - struct xfs_inobt_rec_incore rec; - int error; - int i; - - error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % - XFS_INODES_PER_CHUNK) == 0); - - rec.ir_free &= ~XFS_INOBT_MASK(offset); - rec.ir_freecount--; - - XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && - (rec.ir_freecount == frec->ir_freecount)); - - error = xfs_inobt_update(cur, &rec); - if (error) - return error; - - return 0; -} - -/* - * Allocate an inode using the free inode btree, if available. Otherwise, fall - * back to the inobt search algorithm. - * - * The caller selected an AG for us, and made sure that free inodes are - * available. - */ -STATIC int -xfs_dialloc_ag( - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_ino_t parent, - xfs_ino_t *inop) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); - xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); - struct xfs_perag *pag; - struct xfs_btree_cur *cur; /* finobt cursor */ - struct xfs_btree_cur *icur; /* inobt cursor */ - struct xfs_inobt_rec_incore rec; - xfs_ino_t ino; - int error; - int offset; - int i; - - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) - return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); - - pag = xfs_perag_get(mp, agno); - - /* - * If pagino is 0 (this is the root inode allocation) use newino. - * This must work because we've just allocated some. - */ - if (!pagino) - pagino = be32_to_cpu(agi->agi_newino); - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); - - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error_cur; - - /* - * The search algorithm depends on whether we're in the same AG as the - * parent. If so, find the closest available inode to the parent. If - * not, consider the agi hint or find the first free inode in the AG. - */ - if (agno == pagno) - error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); - else - error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); - if (error) - goto error_cur; - - offset = xfs_lowbit64(rec.ir_free); - ASSERT(offset >= 0); - ASSERT(offset < XFS_INODES_PER_CHUNK); - ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % - XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); - - /* - * Modify or remove the finobt record. - */ - rec.ir_free &= ~XFS_INOBT_MASK(offset); - rec.ir_freecount--; - if (rec.ir_freecount) - error = xfs_inobt_update(cur, &rec); - else - error = xfs_btree_delete(cur, &i); - if (error) - goto error_cur; - - /* - * The finobt has now been updated appropriately. We haven't updated the - * agi and superblock yet, so we can create an inobt cursor and validate - * the original freecount. If all is well, make the equivalent update to - * the inobt using the finobt record and offset information. - */ - icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); - - error = xfs_check_agi_freecount(icur, agi); - if (error) - goto error_icur; - - error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); - if (error) - goto error_icur; - - /* - * Both trees have now been updated. We must update the perag and - * superblock before we can check the freecount for each btree. - */ - be32_add_cpu(&agi->agi_freecount, -1); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - pag->pagi_freecount--; - - xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); - - error = xfs_check_agi_freecount(icur, agi); - if (error) - goto error_icur; - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error_icur; - - xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_perag_put(pag); - *inop = ino; - return 0; - -error_icur: - xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); -error_cur: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_perag_put(pag); - return error; -} - -/* - * Allocate an inode on disk. - * - * Mode is used to tell whether the new inode will need space, and whether it - * is a directory. - * - * This function is designed to be called twice if it has to do an allocation - * to make more free inodes. On the first call, *IO_agbp should be set to NULL. - * If an inode is available without having to performn an allocation, an inode - * number is returned. In this case, *IO_agbp is set to NULL. If an allocation - * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. - * The caller should then commit the current transaction, allocate a - * new transaction, and call xfs_dialloc() again, passing in the previous value - * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI - * buffer is locked across the two calls, the second call is guaranteed to have - * a free inode available. - * - * Once we successfully pick an inode its number is returned and the on-disk - * data structures are updated. The inode itself is not read in, since doing so - * would break ordering constraints with xfs_reclaim. - */ -int -xfs_dialloc( - struct xfs_trans *tp, - xfs_ino_t parent, - umode_t mode, - int okalloc, - struct xfs_buf **IO_agbp, - xfs_ino_t *inop) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *agbp; - xfs_agnumber_t agno; - int error; - int ialloced; - int noroom = 0; - xfs_agnumber_t start_agno; - struct xfs_perag *pag; - - if (*IO_agbp) { - /* - * If the caller passes in a pointer to the AGI buffer, - * continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - goto out_alloc; - } - - /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. - */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); - if (start_agno == NULLAGNUMBER) { - *inop = NULLFSINO; - return 0; - } - - /* - * If we have already hit the ceiling of inode blocks then clear - * okalloc so we scan all available agi structures for a free - * inode. - */ - if (mp->m_maxicount && - mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { - noroom = 1; - okalloc = 0; - } - - /* - * Loop until we find an allocation group that either has free inodes - * or in which we can allocate some inodes. Iterate through the - * allocation groups upward, wrapping at the end. - */ - agno = start_agno; - for (;;) { - pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto nextag; - } - - if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); - if (error) - goto out_error; - } - - /* - * Do a first racy fast path check if this AG is usable. - */ - if (!pag->pagi_freecount && !okalloc) - goto nextag; - - /* - * Then read in the AGI buffer and recheck with the AGI buffer - * lock held. - */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) - goto out_error; - - if (pag->pagi_freecount) { - xfs_perag_put(pag); - goto out_alloc; - } - - if (!okalloc) - goto nextag_relse_buffer; - - - error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); - if (error) { - xfs_trans_brelse(tp, agbp); - - if (error != ENOSPC) - goto out_error; - - xfs_perag_put(pag); - *inop = NULLFSINO; - return 0; - } - - if (ialloced) { - /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. - */ - ASSERT(pag->pagi_freecount > 0); - xfs_perag_put(pag); - - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; - } - -nextag_relse_buffer: - xfs_trans_brelse(tp, agbp); -nextag: - xfs_perag_put(pag); - if (++agno == mp->m_sb.sb_agcount) - agno = 0; - if (agno == start_agno) { - *inop = NULLFSINO; - return noroom ? ENOSPC : 0; - } - } - -out_alloc: - *IO_agbp = NULL; - return xfs_dialloc_ag(tp, agbp, parent, inop); -out_error: - xfs_perag_put(pag); - return error; -} - -STATIC int -xfs_difree_inobt( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_agino_t agino, - struct xfs_bmap_free *flist, - int *deleted, - xfs_ino_t *first_ino, - struct xfs_inobt_rec_incore *orec) -{ - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - struct xfs_perag *pag; - struct xfs_btree_cur *cur; - struct xfs_inobt_rec_incore rec; - int ilen; - int error; - int i; - int off; - - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); - - /* - * Initialize the cursor. - */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); - - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error0; - - /* - * Look for the entry describing this inode. - */ - if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { - xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", - __func__, error); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) { - xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", - __func__, error); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Get the offset in the inode chunk. - */ - off = agino - rec.ir_startino; - ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); - ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); - /* - * Mark the inode free & increment the count. - */ - rec.ir_free |= XFS_INOBT_MASK(off); - rec.ir_freecount++; - - /* - * When an inode cluster is free, it becomes eligible for removal - */ - if (!(mp->m_flags & XFS_MOUNT_IKEEP) && - (rec.ir_freecount == mp->m_ialloc_inos)) { - - *deleted = 1; - *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); - - /* - * Remove the inode cluster from the AGI B+Tree, adjust the - * AGI and Superblock inode counts, and mark the disk space - * to be freed when the transaction is committed. - */ - ilen = mp->m_ialloc_inos; - be32_add_cpu(&agi->agi_count, -ilen); - be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - pag = xfs_perag_get(mp, agno); - pag->pagi_freecount -= ilen - 1; - xfs_perag_put(pag); - xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); - xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); - - if ((error = xfs_btree_delete(cur, &i))) { - xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", - __func__, error); - goto error0; - } - - xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, - XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), - mp->m_ialloc_blks, flist, mp); - } else { - *deleted = 0; - - error = xfs_inobt_update(cur, &rec); - if (error) { - xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", - __func__, error); - goto error0; - } - - /* - * Change the inode free counts and log the ag/sb changes. - */ - be32_add_cpu(&agi->agi_freecount, 1); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - pag = xfs_perag_get(mp, agno); - pag->pagi_freecount++; - xfs_perag_put(pag); - xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); - } - - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error0; - - *orec = rec; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return 0; - -error0: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; -} - -/* - * Free an inode in the free inode btree. - */ -STATIC int -xfs_difree_finobt( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_agino_t agino, - struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ -{ - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - struct xfs_btree_cur *cur; - struct xfs_inobt_rec_incore rec; - int offset = agino - ibtrec->ir_startino; - int error; - int i; - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); - - error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); - if (error) - goto error; - if (i == 0) { - /* - * If the record does not exist in the finobt, we must have just - * freed an inode in a previously fully allocated chunk. If not, - * something is out of sync. - */ - XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); - - error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, - ibtrec->ir_free, &i); - if (error) - goto error; - ASSERT(i == 1); - - goto out; - } - - /* - * Read and update the existing record. We could just copy the ibtrec - * across here, but that would defeat the purpose of having redundant - * metadata. By making the modifications independently, we can catch - * corruptions that we wouldn't see if we just copied from one record - * to another. - */ - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - goto error; - XFS_WANT_CORRUPTED_GOTO(i == 1, error); - - rec.ir_free |= XFS_INOBT_MASK(offset); - rec.ir_freecount++; - - XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && - (rec.ir_freecount == ibtrec->ir_freecount), - error); - - /* - * The content of inobt records should always match between the inobt - * and finobt. The lifecycle of records in the finobt is different from - * the inobt in that the finobt only tracks records with at least one - * free inode. Hence, if all of the inodes are free and we aren't - * keeping inode chunks permanently on disk, remove the record. - * Otherwise, update the record with the new information. - */ - if (rec.ir_freecount == mp->m_ialloc_inos && - !(mp->m_flags & XFS_MOUNT_IKEEP)) { - error = xfs_btree_delete(cur, &i); - if (error) - goto error; - ASSERT(i == 1); - } else { - error = xfs_inobt_update(cur, &rec); - if (error) - goto error; - } - -out: - error = xfs_check_agi_freecount(cur, agi); - if (error) - goto error; - - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return 0; - -error: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; -} - -/* - * Free disk inode. Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int -xfs_difree( - struct xfs_trans *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - struct xfs_bmap_free *flist, /* extents to free */ - int *deleted,/* set if inode cluster was deleted */ - xfs_ino_t *first_ino)/* first inode in deleted cluster */ -{ - /* REFERENCED */ - xfs_agblock_t agbno; /* block number containing inode */ - struct xfs_buf *agbp; /* buffer for allocation group header */ - xfs_agino_t agino; /* allocation group inode number */ - xfs_agnumber_t agno; /* allocation group number */ - int error; /* error return value */ - struct xfs_mount *mp; /* mount structure for filesystem */ - struct xfs_inobt_rec_incore rec;/* btree record */ - - mp = tp->t_mountp; - - /* - * Break up inode number into its components. - */ - agno = XFS_INO_TO_AGNO(mp, inode); - if (agno >= mp->m_sb.sb_agcount) { - xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", - __func__, agno, mp->m_sb.sb_agcount); - ASSERT(0); - return EINVAL; - } - agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", - __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); - ASSERT(0); - return EINVAL; - } - agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); - ASSERT(0); - return EINVAL; - } - /* - * Get the allocation group header. - */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", - __func__, error); - return error; - } - - /* - * Fix up the inode allocation btree. - */ - error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, - &rec); - if (error) - goto error0; - - /* - * Fix up the free inode btree. - */ - if (xfs_sb_version_hasfinobt(&mp->m_sb)) { - error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); - if (error) - goto error0; - } - - return 0; - -error0: - return error; -} - -STATIC int -xfs_imap_lookup( - struct xfs_mount *mp, - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agino_t agino, - xfs_agblock_t agbno, - xfs_agblock_t *chunk_agbno, - xfs_agblock_t *offset_agbno, - int flags) -{ - struct xfs_inobt_rec_incore rec; - struct xfs_btree_cur *cur; - struct xfs_buf *agbp; - int error; - int i; - - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_alert(mp, - "%s: xfs_ialloc_read_agi() returned error %d, agno %d", - __func__, error, agno); - return error; - } - - /* - * Lookup the inode record for the given agino. If the record cannot be - * found, then it's an invalid inode number and we should abort. Once - * we have a record, we need to ensure it contains the inode number - * we are looking up. - */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); - if (!error) { - if (i) - error = xfs_inobt_get_rec(cur, &rec, &i); - if (!error && i == 0) - error = EINVAL; - } - - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - if (error) - return error; - - /* check that the returned record contains the required inode */ - if (rec.ir_startino > agino || - rec.ir_startino + mp->m_ialloc_inos <= agino) - return EINVAL; - - /* for untrusted inodes check it is allocated first */ - if ((flags & XFS_IGET_UNTRUSTED) && - (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) - return EINVAL; - - *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); - *offset_agbno = agbno - *chunk_agbno; - return 0; -} - -/* - * Return the location of the inode in imap, for mapping it into a buffer. - */ -int -xfs_imap( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t ino, /* inode to locate */ - struct xfs_imap *imap, /* location map structure */ - uint flags) /* flags for inode btree lookup */ -{ - xfs_agblock_t agbno; /* block number of inode in the alloc group */ - xfs_agino_t agino; /* inode number within alloc group */ - xfs_agnumber_t agno; /* allocation group number */ - int blks_per_cluster; /* num blocks per inode cluster */ - xfs_agblock_t chunk_agbno; /* first block in inode chunk */ - xfs_agblock_t cluster_agbno; /* first block in inode cluster */ - int error; /* error code */ - int offset; /* index of inode in its buffer */ - xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ - - ASSERT(ino != NULLFSINO); - - /* - * Split up the inode number into its parts. - */ - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); - agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || - ino != XFS_AGINO_TO_INO(mp, agno, agino)) { -#ifdef DEBUG - /* - * Don't output diagnostic information for untrusted inodes - * as they can be invalid without implying corruption. - */ - if (flags & XFS_IGET_UNTRUSTED) - return EINVAL; - if (agno >= mp->m_sb.sb_agcount) { - xfs_alert(mp, - "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", - __func__, agno, mp->m_sb.sb_agcount); - } - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_alert(mp, - "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", - __func__, (unsigned long long)agbno, - (unsigned long)mp->m_sb.sb_agblocks); - } - if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_alert(mp, - "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", - __func__, ino, - XFS_AGINO_TO_INO(mp, agno, agino)); - } - xfs_stack_trace(); -#endif /* DEBUG */ - return EINVAL; - } - - blks_per_cluster = xfs_icluster_size_fsb(mp); - - /* - * For bulkstat and handle lookups, we have an untrusted inode number - * that we have to verify is valid. We cannot do this just by reading - * the inode buffer as it may have been unlinked and removed leaving - * inodes in stale state on disk. Hence we have to do a btree lookup - * in all cases where an untrusted inode number is passed. - */ - if (flags & XFS_IGET_UNTRUSTED) { - error = xfs_imap_lookup(mp, tp, agno, agino, agbno, - &chunk_agbno, &offset_agbno, flags); - if (error) - return error; - goto out_map; - } - - /* - * If the inode cluster size is the same as the blocksize or - * smaller we get to the buffer by simple arithmetics. - */ - if (blks_per_cluster == 1) { - offset = XFS_INO_TO_OFFSET(mp, ino); - ASSERT(offset < mp->m_sb.sb_inopblock); - - imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); - imap->im_len = XFS_FSB_TO_BB(mp, 1); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); - return 0; - } - - /* - * If the inode chunks are aligned then use simple maths to - * find the location. Otherwise we have to do a btree - * lookup to find the location. - */ - if (mp->m_inoalign_mask) { - offset_agbno = agbno & mp->m_inoalign_mask; - chunk_agbno = agbno - offset_agbno; - } else { - error = xfs_imap_lookup(mp, tp, agno, agino, agbno, - &chunk_agbno, &offset_agbno, flags); - if (error) - return error; - } - -out_map: - ASSERT(agbno >= chunk_agbno); - cluster_agbno = chunk_agbno + - ((offset_agbno / blks_per_cluster) * blks_per_cluster); - offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + - XFS_INO_TO_OFFSET(mp, ino); - - imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); - imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); - - /* - * If the inode number maps to a block outside the bounds - * of the file system then return NULL rather than calling - * read_buf and panicing when we get an error from the - * driver. - */ - if ((imap->im_blkno + imap->im_len) > - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - xfs_alert(mp, - "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", - __func__, (unsigned long long) imap->im_blkno, - (unsigned long long) imap->im_len, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return EINVAL; - } - return 0; -} - -/* - * Compute and fill in value of m_in_maxlevels. - */ -void -xfs_ialloc_compute_maxlevels( - xfs_mount_t *mp) /* file system mount structure */ -{ - int level; - uint maxblocks; - uint maxleafents; - int minleafrecs; - int minnoderecs; - - maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> - XFS_INODES_PER_CHUNK_LOG; - minleafrecs = mp->m_alloc_mnr[0]; - minnoderecs = mp->m_alloc_mnr[1]; - maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; - for (level = 1; maxblocks > 1; level++) - maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; - mp->m_in_maxlevels = level; -} - -/* - * Log specified fields for the ag hdr (inode section). The growth of the agi - * structure over time requires that we interpret the buffer as two logical - * regions delineated by the end of the unlinked list. This is due to the size - * of the hash table and its location in the middle of the agi. - * - * For example, a request to log a field before agi_unlinked and a field after - * agi_unlinked could cause us to log the entire hash table and use an excessive - * amount of log space. To avoid this behavior, log the region up through - * agi_unlinked in one call and the region after agi_unlinked through the end of - * the structure in another. - */ -void -xfs_ialloc_log_agi( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* allocation group header buffer */ - int fields) /* bitmask of fields to log */ -{ - int first; /* first byte number */ - int last; /* last byte number */ - static const short offsets[] = { /* field starting offsets */ - /* keep in sync with bit definitions */ - offsetof(xfs_agi_t, agi_magicnum), - offsetof(xfs_agi_t, agi_versionnum), - offsetof(xfs_agi_t, agi_seqno), - offsetof(xfs_agi_t, agi_length), - offsetof(xfs_agi_t, agi_count), - offsetof(xfs_agi_t, agi_root), - offsetof(xfs_agi_t, agi_level), - offsetof(xfs_agi_t, agi_freecount), - offsetof(xfs_agi_t, agi_newino), - offsetof(xfs_agi_t, agi_dirino), - offsetof(xfs_agi_t, agi_unlinked), - offsetof(xfs_agi_t, agi_free_root), - offsetof(xfs_agi_t, agi_free_level), - sizeof(xfs_agi_t) - }; -#ifdef DEBUG - xfs_agi_t *agi; /* allocation group header */ - - agi = XFS_BUF_TO_AGI(bp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); -#endif - - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - - /* - * Compute byte offsets for the first and last fields in the first - * region and log the agi buffer. This only logs up through - * agi_unlinked. - */ - if (fields & XFS_AGI_ALL_BITS_R1) { - xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, - &first, &last); - xfs_trans_log_buf(tp, bp, first, last); - } - - /* - * Mask off the bits in the first region and calculate the first and - * last field offsets for any bits in the second region. - */ - fields &= ~XFS_AGI_ALL_BITS_R1; - if (fields) { - xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, - &first, &last); - xfs_trans_log_buf(tp, bp, first, last); - } -} - -#ifdef DEBUG -STATIC void -xfs_check_agi_unlinked( - struct xfs_agi *agi) -{ - int i; - - for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) - ASSERT(agi->agi_unlinked[i]); -} -#else -#define xfs_check_agi_unlinked(agi) -#endif - -static bool -xfs_agi_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) - return false; - /* - * Validate the magic number of the agi block. - */ - if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) - return false; - if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) - return false; - - /* - * during growfs operations, the perag is not fully initialised, - * so we can't use it for any useful checking. growfs ensures we can't - * use it by using uncached buffers that don't have the perag attached - * so we can detect and avoid this problem. - */ - if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) - return false; - - xfs_check_agi_unlinked(agi); - return true; -} - -static void -xfs_agi_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI, - XFS_RANDOM_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_agi_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (bip) - XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); -} - -const struct xfs_buf_ops xfs_agi_buf_ops = { - .verify_read = xfs_agi_read_verify, - .verify_write = xfs_agi_write_verify, -}; - -/* - * Read in the allocation group header (inode allocation section) - */ -int -xfs_read_agi( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp) /* allocation group hdr buf */ -{ - int error; - - trace_xfs_read_agi(mp, agno); - - ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); - if (error) - return error; - - xfs_buf_set_ref(*bpp, XFS_AGI_REF); - return 0; -} - -int -xfs_ialloc_read_agi( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp) /* allocation group hdr buf */ -{ - struct xfs_agi *agi; /* allocation group header */ - struct xfs_perag *pag; /* per allocation group data */ - int error; - - trace_xfs_ialloc_read_agi(mp, agno); - - error = xfs_read_agi(mp, tp, agno, bpp); - if (error) - return error; - - agi = XFS_BUF_TO_AGI(*bpp); - pag = xfs_perag_get(mp, agno); - if (!pag->pagi_init) { - pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); - pag->pagi_count = be32_to_cpu(agi->agi_count); - pag->pagi_init = 1; - } - - /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. - */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - xfs_perag_put(pag); - return 0; -} - -/* - * Read in the agi to initialise the per-ag data in the mount structure - */ -int -xfs_ialloc_pagi_init( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno) /* allocation group number */ -{ - xfs_buf_t *bp = NULL; - int error; - - error = xfs_ialloc_read_agi(mp, tp, agno, &bp); - if (error) - return error; - if (bp) - xfs_trans_brelse(tp, bp); - return 0; -} diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c deleted file mode 100644 index 726f83a681a5..000000000000 --- a/fs/xfs/xfs_ialloc_btree.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_trans.h" - - -STATIC int -xfs_inobt_get_minrecs( - struct xfs_btree_cur *cur, - int level) -{ - return cur->bc_mp->m_inobt_mnr[level != 0]; -} - -STATIC struct xfs_btree_cur * -xfs_inobt_dup_cursor( - struct xfs_btree_cur *cur) -{ - return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, - cur->bc_btnum); -} - -STATIC void -xfs_inobt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, - int inc) /* level change */ -{ - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - - agi->agi_root = nptr->s; - be32_add_cpu(&agi->agi_level, inc); - xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); -} - -STATIC void -xfs_finobt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, - int inc) /* level change */ -{ - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - - agi->agi_free_root = nptr->s; - be32_add_cpu(&agi->agi_free_level, inc); - xfs_ialloc_log_agi(cur->bc_tp, agbp, - XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); -} - -STATIC int -xfs_inobt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) -{ - xfs_alloc_arg_t args; /* block allocation args */ - int error; /* error return value */ - xfs_agblock_t sbno = be32_to_cpu(start->s); - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - - memset(&args, 0, sizeof(args)); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); - args.minlen = 1; - args.maxlen = 1; - args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - - error = xfs_alloc_vextent(&args); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - - new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); - *stat = 1; - return 0; -} - -STATIC int -xfs_inobt_free_block( - struct xfs_btree_cur *cur, - struct xfs_buf *bp) -{ - xfs_fsblock_t fsbno; - int error; - - fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); - error = xfs_free_extent(cur->bc_tp, fsbno, 1); - if (error) - return error; - - xfs_trans_binval(cur->bc_tp, bp); - return error; -} - -STATIC int -xfs_inobt_get_maxrecs( - struct xfs_btree_cur *cur, - int level) -{ - return cur->bc_mp->m_inobt_mxr[level != 0]; -} - -STATIC void -xfs_inobt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - key->inobt.ir_startino = rec->inobt.ir_startino; -} - -STATIC void -xfs_inobt_init_rec_from_key( - union xfs_btree_key *key, - union xfs_btree_rec *rec) -{ - rec->inobt.ir_startino = key->inobt.ir_startino; -} - -STATIC void -xfs_inobt_init_rec_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec) -{ - rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); - rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); - rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); -} - -/* - * initial value of ptr for lookup - */ -STATIC void -xfs_inobt_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); - - ptr->s = agi->agi_root; -} - -STATIC void -xfs_finobt_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); - ptr->s = agi->agi_free_root; -} - -STATIC __int64_t -xfs_inobt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) -{ - return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - - cur->bc_rec.i.ir_startino; -} - -static int -xfs_inobt_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_perag *pag = bp->b_pag; - unsigned int level; - - /* - * During growfs operations, we can't verify the exact owner as the - * perag is not fully initialised and hence not attached to the buffer. - * - * Similarly, during log recovery we will have a perag structure - * attached, but the agi information will not yet have been initialised - * from the on disk AGI. We don't currently use any of this information, - * but beware of the landmine (i.e. need to check pag->pagi_init) if we - * ever do. - */ - switch (block->bb_magic) { - case cpu_to_be32(XFS_IBT_CRC_MAGIC): - case cpu_to_be32(XFS_FIBT_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) - return false; - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; - if (pag && - be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - /* fall through */ - case cpu_to_be32(XFS_IBT_MAGIC): - case cpu_to_be32(XFS_FIBT_MAGIC): - break; - default: - return 0; - } - - /* numrecs and level verification */ - level = be16_to_cpu(block->bb_level); - if (level >= mp->m_in_maxlevels) - return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) - return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) - return false; - - return true; -} - -static void -xfs_inobt_read_verify( - struct xfs_buf *bp) -{ - if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } -} - -static void -xfs_inobt_write_verify( - struct xfs_buf *bp) -{ - if (!xfs_inobt_verify(bp)) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - xfs_btree_sblock_calc_crc(bp); - -} - -const struct xfs_buf_ops xfs_inobt_buf_ops = { - .verify_read = xfs_inobt_read_verify, - .verify_write = xfs_inobt_write_verify, -}; - -#if defined(DEBUG) || defined(XFS_WARN) -STATIC int -xfs_inobt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) -{ - return be32_to_cpu(k1->inobt.ir_startino) < - be32_to_cpu(k2->inobt.ir_startino); -} - -STATIC int -xfs_inobt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) -{ - return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= - be32_to_cpu(r2->inobt.ir_startino); -} -#endif /* DEBUG */ - -static const struct xfs_btree_ops xfs_inobt_ops = { - .rec_len = sizeof(xfs_inobt_rec_t), - .key_len = sizeof(xfs_inobt_key_t), - - .dup_cursor = xfs_inobt_dup_cursor, - .set_root = xfs_inobt_set_root, - .alloc_block = xfs_inobt_alloc_block, - .free_block = xfs_inobt_free_block, - .get_minrecs = xfs_inobt_get_minrecs, - .get_maxrecs = xfs_inobt_get_maxrecs, - .init_key_from_rec = xfs_inobt_init_key_from_rec, - .init_rec_from_key = xfs_inobt_init_rec_from_key, - .init_rec_from_cur = xfs_inobt_init_rec_from_cur, - .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, - .key_diff = xfs_inobt_key_diff, - .buf_ops = &xfs_inobt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_inobt_keys_inorder, - .recs_inorder = xfs_inobt_recs_inorder, -#endif -}; - -static const struct xfs_btree_ops xfs_finobt_ops = { - .rec_len = sizeof(xfs_inobt_rec_t), - .key_len = sizeof(xfs_inobt_key_t), - - .dup_cursor = xfs_inobt_dup_cursor, - .set_root = xfs_finobt_set_root, - .alloc_block = xfs_inobt_alloc_block, - .free_block = xfs_inobt_free_block, - .get_minrecs = xfs_inobt_get_minrecs, - .get_maxrecs = xfs_inobt_get_maxrecs, - .init_key_from_rec = xfs_inobt_init_key_from_rec, - .init_rec_from_key = xfs_inobt_init_rec_from_key, - .init_rec_from_cur = xfs_inobt_init_rec_from_cur, - .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, - .key_diff = xfs_inobt_key_diff, - .buf_ops = &xfs_inobt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_inobt_keys_inorder, - .recs_inorder = xfs_inobt_recs_inorder, -#endif -}; - -/* - * Allocate a new inode btree cursor. - */ -struct xfs_btree_cur * /* new inode btree cursor */ -xfs_inobt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agi structure */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_btnum_t btnum) /* ialloc or free ino btree */ -{ - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); - struct xfs_btree_cur *cur; - - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; - if (btnum == XFS_BTNUM_INO) { - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - cur->bc_ops = &xfs_inobt_ops; - } else { - cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); - cur->bc_ops = &xfs_finobt_ops; - } - - cur->bc_blocklog = mp->m_sb.sb_blocklog; - - if (xfs_sb_version_hascrc(&mp->m_sb)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - - return cur; -} - -/* - * Calculate number of records in an inobt btree block. - */ -int -xfs_inobt_maxrecs( - struct xfs_mount *mp, - int blocklen, - int leaf) -{ - blocklen -= XFS_INOBT_BLOCK_LEN(mp); - - if (leaf) - return blocklen / sizeof(xfs_inobt_rec_t); - return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); -} diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c deleted file mode 100644 index 1e5366d7745e..000000000000 --- a/fs/xfs/xfs_inode_buf.c +++ /dev/null @@ -1,479 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_icache.h" -#include "xfs_trans.h" -#include "xfs_ialloc.h" -#include "xfs_dinode.h" - -/* - * Check that none of the inode's in the buffer have a next - * unlinked field of 0. - */ -#if defined(DEBUG) -void -xfs_inobp_check( - xfs_mount_t *mp, - xfs_buf_t *bp) -{ - int i; - int j; - xfs_dinode_t *dip; - - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - - for (i = 0; i < j; i++) { - dip = (xfs_dinode_t *)xfs_buf_offset(bp, - i * mp->m_sb.sb_inodesize); - if (!dip->di_next_unlinked) { - xfs_alert(mp, - "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", - i, (long long)bp->b_bn); - } - } -} -#endif - -/* - * If we are doing readahead on an inode buffer, we might be in log recovery - * reading an inode allocation buffer that hasn't yet been replayed, and hence - * has not had the inode cores stamped into it. Hence for readahead, the buffer - * may be potentially invalid. - * - * If the readahead buffer is invalid, we don't want to mark it with an error, - * but we do want to clear the DONE status of the buffer so that a followup read - * will re-read it from disk. This will ensure that we don't get an unnecessary - * warnings during log recovery and we don't get unnecssary panics on debug - * kernels. - */ -static void -xfs_inode_buf_verify( - struct xfs_buf *bp, - bool readahead) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - int i; - int ni; - - /* - * Validate the magic number and version of every inode in the buffer - */ - ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; - for (i = 0; i < ni; i++) { - int di_ok; - xfs_dinode_t *dip; - - dip = (struct xfs_dinode *)xfs_buf_offset(bp, - (i << mp->m_sb.sb_inodelog)); - di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && - XFS_DINODE_GOOD_VERSION(dip->di_version); - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, - XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { - if (readahead) { - bp->b_flags &= ~XBF_DONE; - return; - } - - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); -#ifdef DEBUG - xfs_alert(mp, - "bad inode magic/vsn daddr %lld #%d (magic=%x)", - (unsigned long long)bp->b_bn, i, - be16_to_cpu(dip->di_magic)); -#endif - } - } - xfs_inobp_check(mp, bp); -} - - -static void -xfs_inode_buf_read_verify( - struct xfs_buf *bp) -{ - xfs_inode_buf_verify(bp, false); -} - -static void -xfs_inode_buf_readahead_verify( - struct xfs_buf *bp) -{ - xfs_inode_buf_verify(bp, true); -} - -static void -xfs_inode_buf_write_verify( - struct xfs_buf *bp) -{ - xfs_inode_buf_verify(bp, false); -} - -const struct xfs_buf_ops xfs_inode_buf_ops = { - .verify_read = xfs_inode_buf_read_verify, - .verify_write = xfs_inode_buf_write_verify, -}; - -const struct xfs_buf_ops xfs_inode_buf_ra_ops = { - .verify_read = xfs_inode_buf_readahead_verify, - .verify_write = xfs_inode_buf_write_verify, -}; - - -/* - * This routine is called to map an inode to the buffer containing the on-disk - * version of the inode. It returns a pointer to the buffer containing the - * on-disk inode in the bpp parameter, and in the dipp parameter it returns a - * pointer to the on-disk inode within that buffer. - * - * If a non-zero error is returned, then the contents of bpp and dipp are - * undefined. - */ -int -xfs_imap_to_bp( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_imap *imap, - struct xfs_dinode **dipp, - struct xfs_buf **bpp, - uint buf_flags, - uint iget_flags) -{ - struct xfs_buf *bp; - int error; - - buf_flags |= XBF_UNMAPPED; - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, - (int)imap->im_len, buf_flags, &bp, - &xfs_inode_buf_ops); - if (error) { - if (error == EAGAIN) { - ASSERT(buf_flags & XBF_TRYLOCK); - return error; - } - - if (error == EFSCORRUPTED && - (iget_flags & XFS_IGET_UNTRUSTED)) - return EINVAL; - - xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", - __func__, error); - return error; - } - - *bpp = bp; - *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); - return 0; -} - -void -xfs_dinode_from_disk( - xfs_icdinode_t *to, - xfs_dinode_t *from) -{ - to->di_magic = be16_to_cpu(from->di_magic); - to->di_mode = be16_to_cpu(from->di_mode); - to->di_version = from ->di_version; - to->di_format = from->di_format; - to->di_onlink = be16_to_cpu(from->di_onlink); - to->di_uid = be32_to_cpu(from->di_uid); - to->di_gid = be32_to_cpu(from->di_gid); - to->di_nlink = be32_to_cpu(from->di_nlink); - to->di_projid_lo = be16_to_cpu(from->di_projid_lo); - to->di_projid_hi = be16_to_cpu(from->di_projid_hi); - memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - to->di_flushiter = be16_to_cpu(from->di_flushiter); - to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); - to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); - to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); - to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); - to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); - to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); - to->di_size = be64_to_cpu(from->di_size); - to->di_nblocks = be64_to_cpu(from->di_nblocks); - to->di_extsize = be32_to_cpu(from->di_extsize); - to->di_nextents = be32_to_cpu(from->di_nextents); - to->di_anextents = be16_to_cpu(from->di_anextents); - to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; - to->di_dmevmask = be32_to_cpu(from->di_dmevmask); - to->di_dmstate = be16_to_cpu(from->di_dmstate); - to->di_flags = be16_to_cpu(from->di_flags); - to->di_gen = be32_to_cpu(from->di_gen); - - if (to->di_version == 3) { - to->di_changecount = be64_to_cpu(from->di_changecount); - to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); - to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); - to->di_flags2 = be64_to_cpu(from->di_flags2); - to->di_ino = be64_to_cpu(from->di_ino); - to->di_lsn = be64_to_cpu(from->di_lsn); - memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); - uuid_copy(&to->di_uuid, &from->di_uuid); - } -} - -void -xfs_dinode_to_disk( - xfs_dinode_t *to, - xfs_icdinode_t *from) -{ - to->di_magic = cpu_to_be16(from->di_magic); - to->di_mode = cpu_to_be16(from->di_mode); - to->di_version = from ->di_version; - to->di_format = from->di_format; - to->di_onlink = cpu_to_be16(from->di_onlink); - to->di_uid = cpu_to_be32(from->di_uid); - to->di_gid = cpu_to_be32(from->di_gid); - to->di_nlink = cpu_to_be32(from->di_nlink); - to->di_projid_lo = cpu_to_be16(from->di_projid_lo); - to->di_projid_hi = cpu_to_be16(from->di_projid_hi); - memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); - to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); - to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); - to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); - to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); - to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); - to->di_size = cpu_to_be64(from->di_size); - to->di_nblocks = cpu_to_be64(from->di_nblocks); - to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(from->di_nextents); - to->di_anextents = cpu_to_be16(from->di_anextents); - to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; - to->di_dmevmask = cpu_to_be32(from->di_dmevmask); - to->di_dmstate = cpu_to_be16(from->di_dmstate); - to->di_flags = cpu_to_be16(from->di_flags); - to->di_gen = cpu_to_be32(from->di_gen); - - if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(from->di_changecount); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); - to->di_flags2 = cpu_to_be64(from->di_flags2); - to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); - memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); - uuid_copy(&to->di_uuid, &from->di_uuid); - to->di_flushiter = 0; - } else { - to->di_flushiter = cpu_to_be16(from->di_flushiter); - } -} - -static bool -xfs_dinode_verify( - struct xfs_mount *mp, - struct xfs_inode *ip, - struct xfs_dinode *dip) -{ - if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) - return false; - - /* only version 3 or greater inodes are extensively verified here */ - if (dip->di_version < 3) - return true; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF)) - return false; - if (be64_to_cpu(dip->di_ino) != ip->i_ino) - return false; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) - return false; - return true; -} - -void -xfs_dinode_calc_crc( - struct xfs_mount *mp, - struct xfs_dinode *dip) -{ - __uint32_t crc; - - if (dip->di_version < 3) - return; - - ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); - crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF); - dip->di_crc = xfs_end_cksum(crc); -} - -/* - * Read the disk inode attributes into the in-core inode structure. - * - * For version 5 superblocks, if we are initialising a new inode and we are not - * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new - * inode core with a random generation number. If we are keeping inodes around, - * we need to read the inode cluster to get the existing generation number off - * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode - * format) then log recovery is dependent on the di_flushiter field being - * initialised from the current on-disk value and hence we must also read the - * inode off disk. - */ -int -xfs_iread( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_inode_t *ip, - uint iget_flags) -{ - xfs_buf_t *bp; - xfs_dinode_t *dip; - int error; - - /* - * Fill in the location information in the in-core inode. - */ - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); - if (error) - return error; - - /* shortcut IO on inode allocation if possible */ - if ((iget_flags & XFS_IGET_CREATE) && - xfs_sb_version_hascrc(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_IKEEP)) { - /* initialise the on-disk inode core */ - memset(&ip->i_d, 0, sizeof(ip->i_d)); - ip->i_d.di_magic = XFS_DINODE_MAGIC; - ip->i_d.di_gen = prandom_u32(); - if (xfs_sb_version_hascrc(&mp->m_sb)) { - ip->i_d.di_version = 3; - ip->i_d.di_ino = ip->i_ino; - uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); - } else - ip->i_d.di_version = 2; - return 0; - } - - /* - * Get pointers to the on-disk inode and the buffer containing it. - */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); - if (error) - return error; - - /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld failed", - __func__, ip->i_ino); - - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); - error = EFSCORRUPTED; - goto out_brelse; - } - - /* - * If the on-disk inode is already linked to a directory - * entry, copy all of the inode into the in-core inode. - * xfs_iformat_fork() handles copying in the inode format - * specific information. - * Otherwise, just get the truly permanent information. - */ - if (dip->di_mode) { - xfs_dinode_from_disk(&ip->i_d, dip); - error = xfs_iformat_fork(ip, dip); - if (error) { -#ifdef DEBUG - xfs_alert(mp, "%s: xfs_iformat() returned error %d", - __func__, error); -#endif /* DEBUG */ - goto out_brelse; - } - } else { - /* - * Partial initialisation of the in-core inode. Just the bits - * that xfs_ialloc won't overwrite or relies on being correct. - */ - ip->i_d.di_magic = be16_to_cpu(dip->di_magic); - ip->i_d.di_version = dip->di_version; - ip->i_d.di_gen = be32_to_cpu(dip->di_gen); - ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); - - if (dip->di_version == 3) { - ip->i_d.di_ino = be64_to_cpu(dip->di_ino); - uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); - } - - /* - * Make sure to pull in the mode here as well in - * case the inode is released without being used. - * This ensures that xfs_inactive() will see that - * the inode is already free and not try to mess - * with the uninitialized part of it. - */ - ip->i_d.di_mode = 0; - } - - /* - * Automatically convert version 1 inode formats in memory to version 2 - * inode format. If the inode is modified, it will get logged and - * rewritten as a version 2 inode. We can do this because we set the - * superblock feature bit for v2 inodes unconditionally during mount - * and it means the reast of the code can assume the inode version is 2 - * or higher. - */ - if (ip->i_d.di_version == 1) { - ip->i_d.di_version = 2; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - ip->i_d.di_nlink = ip->i_d.di_onlink; - ip->i_d.di_onlink = 0; - xfs_set_projid(ip, 0); - } - - ip->i_delayed_blks = 0; - - /* - * Mark the buffer containing the inode as something to keep - * around for a while. This helps to keep recently accessed - * meta-data in-core longer. - */ - xfs_buf_set_ref(bp, XFS_INO_REF); - - /* - * Use xfs_trans_brelse() to release the buffer containing the on-disk - * inode, because it was acquired with xfs_trans_read_buf() in - * xfs_imap_to_bp() above. If tp is NULL, this is just a normal - * brelse(). If we're within a transaction, then xfs_trans_brelse() - * will only release the buffer if it is not dirty within the - * transaction. It will be OK to release the buffer in this case, - * because inodes on disk are never destroyed and we will be locking the - * new in-core inode before putting it in the cache where other - * processes can find it. Thus we don't have to worry about the inode - * being changed just because we released the buffer. - */ - out_brelse: - xfs_trans_brelse(tp, bp); - return error; -} diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c deleted file mode 100644 index 2a124e97f082..000000000000 --- a/fs/xfs/xfs_inode_fork.c +++ /dev/null @@ -1,1906 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include - -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" - -kmem_zone_t *xfs_ifork_zone; - -STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); -STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); -STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); - -#ifdef DEBUG -/* - * Make sure that the extents in the given memory buffer - * are valid. - */ -void -xfs_validate_extents( - xfs_ifork_t *ifp, - int nrecs, - xfs_exntfmt_t fmt) -{ - xfs_bmbt_irec_t irec; - xfs_bmbt_rec_host_t rec; - int i; - - for (i = 0; i < nrecs; i++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - rec.l0 = get_unaligned(&ep->l0); - rec.l1 = get_unaligned(&ep->l1); - xfs_bmbt_get_all(&rec, &irec); - if (fmt == XFS_EXTFMT_NOSTATE) - ASSERT(irec.br_state == XFS_EXT_NORM); - } -} -#else /* DEBUG */ -#define xfs_validate_extents(ifp, nrecs, fmt) -#endif /* DEBUG */ - - -/* - * Move inode type and inode format specific information from the - * on-disk inode to the in-core inode. For fifos, devs, and sockets - * this means set if_rdev to the proper value. For files, directories, - * and symlinks this means to bring in the in-line data or extent - * pointers. For a file in B-tree format, only the root is immediately - * brought in-core. The rest will be in-lined in if_extents when it - * is first referenced (see xfs_iread_extents()). - */ -int -xfs_iformat_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip) -{ - xfs_attr_shortform_t *atp; - int size; - int error = 0; - xfs_fsize_t di_size; - - if (unlikely(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks))) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", - (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents)), - (unsigned long long) - be64_to_cpu(dip->di_nblocks)); - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", - (unsigned long long)ip->i_ino, - dip->di_forkoff); - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && - !ip->i_mount->m_rtdev_targp)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, has realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return EFSCORRUPTED; - } - - switch (ip->i_d.di_mode & S_IFMT) { - case S_IFIFO: - case S_IFCHR: - case S_IFBLK: - case S_IFSOCK: - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - ip->i_d.di_size = 0; - ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); - break; - - case S_IFREG: - case S_IFLNK: - case S_IFDIR: - switch (dip->di_format) { - case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (local format for regular file).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(4)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - di_size = be64_to_cpu(dip->di_size); - if (unlikely(di_size < 0 || - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %Ld for local inode).", - (unsigned long long) ip->i_ino, - (long long) di_size); - XFS_CORRUPTION_ERROR("xfs_iformat(5)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - size = (int)di_size; - error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); - break; - default: - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, - ip->i_mount); - return EFSCORRUPTED; - } - break; - - default: - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); - return EFSCORRUPTED; - } - if (error) { - return error; - } - if (!XFS_DFORK_Q(dip)) - return 0; - - ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); - - switch (dip->di_aformat) { - case XFS_DINODE_FMT_LOCAL: - atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); - size = be16_to_cpu(atp->hdr.totsize); - - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad attr fork size %Ld).", - (unsigned long long) ip->i_ino, - (long long) size); - XFS_CORRUPTION_ERROR("xfs_iformat(8)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); - break; - default: - error = EFSCORRUPTED; - break; - } - if (error) { - kmem_zone_free(xfs_ifork_zone, ip->i_afp); - ip->i_afp = NULL; - xfs_idestroy_fork(ip, XFS_DATA_FORK); - } - return error; -} - -/* - * The file is in-lined in the on-disk inode. - * If it fits into if_inline_data, then copy - * it there, otherwise allocate a buffer for it - * and copy the data there. Either way, set - * if_data to point at the data. - * If we allocate a buffer for the data, make - * sure that its size is a multiple of 4 and - * record the real size in i_real_bytes. - */ -STATIC int -xfs_iformat_local( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork, - int size) -{ - xfs_ifork_t *ifp; - int real_size; - - /* - * If the size is unreasonable, then something - * is wrong and we just bail out rather than crash in - * kmem_alloc() or memcpy() below. - */ - if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %d for local fork, size = %d).", - (unsigned long long) ip->i_ino, size, - XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); - XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - ifp = XFS_IFORK_PTR(ip, whichfork); - real_size = 0; - if (size == 0) - ifp->if_u1.if_data = NULL; - else if (size <= sizeof(ifp->if_u2.if_inline_data)) - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - else { - real_size = roundup(size, 4); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); - } - ifp->if_bytes = size; - ifp->if_real_bytes = real_size; - if (size) - memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); - ifp->if_flags &= ~XFS_IFEXTENTS; - ifp->if_flags |= XFS_IFINLINE; - return 0; -} - -/* - * The file consists of a set of extents all - * of which fit into the on-disk inode. - * If there are few enough extents to fit into - * the if_inline_ext, then copy them there. - * Otherwise allocate a buffer for them and copy - * them into it. Either way, set if_extents - * to point at the extents. - */ -STATIC int -xfs_iformat_extents( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork) -{ - xfs_bmbt_rec_t *dp; - xfs_ifork_t *ifp; - int nex; - int size; - int i; - - ifp = XFS_IFORK_PTR(ip, whichfork); - nex = XFS_DFORK_NEXTENTS(dip, whichfork); - size = nex * (uint)sizeof(xfs_bmbt_rec_t); - - /* - * If the number of extents is unreasonable, then something - * is wrong and we just bail out rather than crash in - * kmem_alloc() or memcpy() below. - */ - if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", - (unsigned long long) ip->i_ino, nex); - XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return EFSCORRUPTED; - } - - ifp->if_real_bytes = 0; - if (nex == 0) - ifp->if_u1.if_extents = NULL; - else if (nex <= XFS_INLINE_EXTS) - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - else - xfs_iext_add(ifp, 0, nex); - - ifp->if_bytes = size; - if (size) { - dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); - xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); - for (i = 0; i < nex; i++, dp++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - ep->l0 = get_unaligned_be64(&dp->l0); - ep->l1 = get_unaligned_be64(&dp->l1); - } - XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); - if (whichfork != XFS_DATA_FORK || - XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) - if (unlikely(xfs_check_nostate_extents( - ifp, 0, nex))) { - XFS_ERROR_REPORT("xfs_iformat_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); - return EFSCORRUPTED; - } - } - ifp->if_flags |= XFS_IFEXTENTS; - return 0; -} - -/* - * The file has too many extents to fit into - * the inode, so they are in B-tree format. - * Allocate a buffer for the root of the B-tree - * and copy the root into it. The i_extents - * field will remain NULL until all of the - * extents are read in (when they are needed). - */ -STATIC int -xfs_iformat_btree( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_bmdr_block_t *dfp; - xfs_ifork_t *ifp; - /* REFERENCED */ - int nrecs; - int size; - - ifp = XFS_IFORK_PTR(ip, whichfork); - dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); - size = XFS_BMAP_BROOT_SPACE(mp, dfp); - nrecs = be16_to_cpu(dfp->bb_numrecs); - - /* - * blow out if -- fork has less extents than can fit in - * fork (fork shouldn't be a btree format), root btree - * block has more records than can fit into the fork, - * or the number of extents is greater than the number of - * blocks. - */ - if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= - XFS_IFORK_MAXEXT(ip, whichfork) || - XFS_BMDR_SPACE_CALC(nrecs) > - XFS_DFORK_SIZE(dip, mp, whichfork) || - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { - xfs_warn(mp, "corrupt inode %Lu (btree).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, - mp, dip); - return EFSCORRUPTED; - } - - ifp->if_broot_bytes = size; - ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); - ASSERT(ifp->if_broot != NULL); - /* - * Copy and convert from the on-disk structure - * to the in-memory structure. - */ - xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), - ifp->if_broot, size); - ifp->if_flags &= ~XFS_IFEXTENTS; - ifp->if_flags |= XFS_IFBROOT; - - return 0; -} - -/* - * Read in extents from a btree-format inode. - * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. - */ -int -xfs_iread_extents( - xfs_trans_t *tp, - xfs_inode_t *ip, - int whichfork) -{ - int error; - xfs_ifork_t *ifp; - xfs_extnum_t nextents; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, - ip->i_mount); - return EFSCORRUPTED; - } - nextents = XFS_IFORK_NEXTENTS(ip, whichfork); - ifp = XFS_IFORK_PTR(ip, whichfork); - - /* - * We know that the size is valid (it's checked in iformat_btree) - */ - ifp->if_bytes = ifp->if_real_bytes = 0; - ifp->if_flags |= XFS_IFEXTENTS; - xfs_iext_add(ifp, 0, nextents); - error = xfs_bmap_read_extents(tp, ip, whichfork); - if (error) { - xfs_iext_destroy(ifp); - ifp->if_flags &= ~XFS_IFEXTENTS; - return error; - } - xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); - return 0; -} -/* - * Reallocate the space for if_broot based on the number of records - * being added or deleted as indicated in rec_diff. Move the records - * and pointers in if_broot to fit the new size. When shrinking this - * will eliminate holes between the records and pointers created by - * the caller. When growing this will create holes to be filled in - * by the caller. - * - * The caller must not request to add more records than would fit in - * the on-disk inode root. If the if_broot is currently NULL, then - * if we are adding records, one will be allocated. The caller must also - * not request that the number of records go below zero, although - * it can go to zero. - * - * ip -- the inode whose if_broot area is changing - * ext_diff -- the change in the number of records, positive or negative, - * requested for the if_broot array. - */ -void -xfs_iroot_realloc( - xfs_inode_t *ip, - int rec_diff, - int whichfork) -{ - struct xfs_mount *mp = ip->i_mount; - int cur_max; - xfs_ifork_t *ifp; - struct xfs_btree_block *new_broot; - int new_max; - size_t new_size; - char *np; - char *op; - - /* - * Handle the degenerate case quietly. - */ - if (rec_diff == 0) { - return; - } - - ifp = XFS_IFORK_PTR(ip, whichfork); - if (rec_diff > 0) { - /* - * If there wasn't any memory allocated before, just - * allocate it now and get out. - */ - if (ifp->if_broot_bytes == 0) { - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); - ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); - ifp->if_broot_bytes = (int)new_size; - return; - } - - /* - * If there is already an existing if_broot, then we need - * to realloc() it and shift the pointers to their new - * location. The records don't change location because - * they are kept butted up against the btree block header. - */ - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); - new_max = cur_max + rec_diff; - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); - ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, - XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max), - KM_SLEEP | KM_NOFS); - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, - (int)new_size); - ifp->if_broot_bytes = (int)new_size; - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= - XFS_IFORK_SIZE(ip, whichfork)); - memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); - return; - } - - /* - * rec_diff is less than 0. In this case, we are shrinking the - * if_broot buffer. It must already exist. If we go to zero - * records, just get rid of the root and clear the status bit. - */ - ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); - new_max = cur_max + rec_diff; - ASSERT(new_max >= 0); - if (new_max > 0) - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); - else - new_size = 0; - if (new_size > 0) { - new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); - /* - * First copy over the btree block header. - */ - memcpy(new_broot, ifp->if_broot, - XFS_BMBT_BLOCK_LEN(ip->i_mount)); - } else { - new_broot = NULL; - ifp->if_flags &= ~XFS_IFBROOT; - } - - /* - * Only copy the records and pointers if there are any. - */ - if (new_max > 0) { - /* - * First copy the records. - */ - op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); - np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); - memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); - - /* - * Then copy the pointers. - */ - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, - (int)new_size); - memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); - } - kmem_free(ifp->if_broot); - ifp->if_broot = new_broot; - ifp->if_broot_bytes = (int)new_size; - if (ifp->if_broot) - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= - XFS_IFORK_SIZE(ip, whichfork)); - return; -} - - -/* - * This is called when the amount of space needed for if_data - * is increased or decreased. The change in size is indicated by - * the number of bytes that need to be added or deleted in the - * byte_diff parameter. - * - * If the amount of space needed has decreased below the size of the - * inline buffer, then switch to using the inline buffer. Otherwise, - * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer - * to what is needed. - * - * ip -- the inode whose if_data area is changing - * byte_diff -- the change in the number of bytes, positive or negative, - * requested for the if_data array. - */ -void -xfs_idata_realloc( - xfs_inode_t *ip, - int byte_diff, - int whichfork) -{ - xfs_ifork_t *ifp; - int new_size; - int real_size; - - if (byte_diff == 0) { - return; - } - - ifp = XFS_IFORK_PTR(ip, whichfork); - new_size = (int)ifp->if_bytes + byte_diff; - ASSERT(new_size >= 0); - - if (new_size == 0) { - if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - kmem_free(ifp->if_u1.if_data); - } - ifp->if_u1.if_data = NULL; - real_size = 0; - } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { - /* - * If the valid extents/data can fit in if_inline_ext/data, - * copy them from the malloc'd vector and free it. - */ - if (ifp->if_u1.if_data == NULL) { - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - ASSERT(ifp->if_real_bytes != 0); - memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, - new_size); - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - } - real_size = 0; - } else { - /* - * Stuck with malloc/realloc. - * For inline data, the underlying buffer must be - * a multiple of 4 bytes in size so that it can be - * logged and stay on word boundaries. We enforce - * that here. - */ - real_size = roundup(new_size, 4); - if (ifp->if_u1.if_data == NULL) { - ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, - KM_SLEEP | KM_NOFS); - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - /* - * Only do the realloc if the underlying size - * is really changing. - */ - if (ifp->if_real_bytes != real_size) { - ifp->if_u1.if_data = - kmem_realloc(ifp->if_u1.if_data, - real_size, - ifp->if_real_bytes, - KM_SLEEP | KM_NOFS); - } - } else { - ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, - KM_SLEEP | KM_NOFS); - memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, - ifp->if_bytes); - } - } - ifp->if_real_bytes = real_size; - ifp->if_bytes = new_size; - ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); -} - -void -xfs_idestroy_fork( - xfs_inode_t *ip, - int whichfork) -{ - xfs_ifork_t *ifp; - - ifp = XFS_IFORK_PTR(ip, whichfork); - if (ifp->if_broot != NULL) { - kmem_free(ifp->if_broot); - ifp->if_broot = NULL; - } - - /* - * If the format is local, then we can't have an extents - * array so just look for an inline data array. If we're - * not local then we may or may not have an extents list, - * so check and free it up if we do. - */ - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && - (ifp->if_u1.if_data != NULL)) { - ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = NULL; - ifp->if_real_bytes = 0; - } - } else if ((ifp->if_flags & XFS_IFEXTENTS) && - ((ifp->if_flags & XFS_IFEXTIREC) || - ((ifp->if_u1.if_extents != NULL) && - (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { - ASSERT(ifp->if_real_bytes != 0); - xfs_iext_destroy(ifp); - } - ASSERT(ifp->if_u1.if_extents == NULL || - ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); - ASSERT(ifp->if_real_bytes == 0); - if (whichfork == XFS_ATTR_FORK) { - kmem_zone_free(xfs_ifork_zone, ip->i_afp); - ip->i_afp = NULL; - } -} - -/* - * Convert in-core extents to on-disk form - * - * For either the data or attr fork in extent format, we need to endian convert - * the in-core extent as we place them into the on-disk inode. - * - * In the case of the data fork, the in-core and on-disk fork sizes can be - * different due to delayed allocation extents. We only copy on-disk extents - * here, so callers must always use the physical fork size to determine the - * size of the buffer passed to this routine. We will return the size actually - * used. - */ -int -xfs_iextents_copy( - xfs_inode_t *ip, - xfs_bmbt_rec_t *dp, - int whichfork) -{ - int copied; - int i; - xfs_ifork_t *ifp; - int nrecs; - xfs_fsblock_t start_block; - - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(ifp->if_bytes > 0); - - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); - ASSERT(nrecs > 0); - - /* - * There are some delayed allocation extents in the - * inode, so copy the extents one at a time and skip - * the delayed ones. There must be at least one - * non-delayed extent. - */ - copied = 0; - for (i = 0; i < nrecs; i++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - start_block = xfs_bmbt_get_startblock(ep); - if (isnullstartblock(start_block)) { - /* - * It's a delayed allocation extent, so skip it. - */ - continue; - } - - /* Translate to on disk format */ - put_unaligned_be64(ep->l0, &dp->l0); - put_unaligned_be64(ep->l1, &dp->l1); - dp++; - copied++; - } - ASSERT(copied != 0); - xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); - - return (copied * (uint)sizeof(xfs_bmbt_rec_t)); -} - -/* - * Each of the following cases stores data into the same region - * of the on-disk inode, so only one of them can be valid at - * any given time. While it is possible to have conflicting formats - * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is - * in EXTENTS format, this can only happen when the fork has - * changed formats after being modified but before being flushed. - * In these cases, the format always takes precedence, because the - * format indicates the current state of the fork. - */ -void -xfs_iflush_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip, - xfs_inode_log_item_t *iip, - int whichfork) -{ - char *cp; - xfs_ifork_t *ifp; - xfs_mount_t *mp; - static const short brootflag[2] = - { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; - static const short dataflag[2] = - { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; - static const short extflag[2] = - { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; - - if (!iip) - return; - ifp = XFS_IFORK_PTR(ip, whichfork); - /* - * This can happen if we gave up in iformat in an error path, - * for the attribute fork. - */ - if (!ifp) { - ASSERT(whichfork == XFS_ATTR_FORK); - return; - } - cp = XFS_DFORK_PTR(dip, whichfork); - mp = ip->i_mount; - switch (XFS_IFORK_FORMAT(ip, whichfork)) { - case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & dataflag[whichfork]) && - (ifp->if_bytes > 0)) { - ASSERT(ifp->if_u1.if_data != NULL); - ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); - memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); - } - break; - - case XFS_DINODE_FMT_EXTENTS: - ASSERT((ifp->if_flags & XFS_IFEXTENTS) || - !(iip->ili_fields & extflag[whichfork])); - if ((iip->ili_fields & extflag[whichfork]) && - (ifp->if_bytes > 0)) { - ASSERT(xfs_iext_get_ext(ifp, 0)); - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); - (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, - whichfork); - } - break; - - case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & brootflag[whichfork]) && - (ifp->if_broot_bytes > 0)) { - ASSERT(ifp->if_broot != NULL); - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= - XFS_IFORK_SIZE(ip, whichfork)); - xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, - (xfs_bmdr_block_t *)cp, - XFS_DFORK_SIZE(dip, mp, whichfork)); - } - break; - - case XFS_DINODE_FMT_DEV: - if (iip->ili_fields & XFS_ILOG_DEV) { - ASSERT(whichfork == XFS_DATA_FORK); - xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); - } - break; - - case XFS_DINODE_FMT_UUID: - if (iip->ili_fields & XFS_ILOG_UUID) { - ASSERT(whichfork == XFS_DATA_FORK); - memcpy(XFS_DFORK_DPTR(dip), - &ip->i_df.if_u2.if_uuid, - sizeof(uuid_t)); - } - break; - - default: - ASSERT(0); - break; - } -} - -/* - * Return a pointer to the extent record at file index idx. - */ -xfs_bmbt_rec_host_t * -xfs_iext_get_ext( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx) /* index of target extent */ -{ - ASSERT(idx >= 0); - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - - if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { - return ifp->if_u1.if_ext_irec->er_extbuf; - } else if (ifp->if_flags & XFS_IFEXTIREC) { - xfs_ext_irec_t *erp; /* irec pointer */ - int erp_idx = 0; /* irec index */ - xfs_extnum_t page_idx = idx; /* ext index in target list */ - - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); - return &erp->er_extbuf[page_idx]; - } else if (ifp->if_bytes) { - return &ifp->if_u1.if_extents[idx]; - } else { - return NULL; - } -} - -/* - * Insert new item(s) into the extent records for incore inode - * fork 'ifp'. 'count' new items are inserted at index 'idx'. - */ -void -xfs_iext_insert( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* starting index of new items */ - xfs_extnum_t count, /* number of inserted items */ - xfs_bmbt_irec_t *new, /* items to insert */ - int state) /* type of extent conversion */ -{ - xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; - xfs_extnum_t i; /* extent record index */ - - trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); - - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - xfs_iext_add(ifp, idx, count); - for (i = idx; i < idx + count; i++, new++) - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); -} - -/* - * This is called when the amount of space required for incore file - * extents needs to be increased. The ext_diff parameter stores the - * number of new extents being added and the idx parameter contains - * the extent index where the new extents will be added. If the new - * extents are being appended, then we just need to (re)allocate and - * initialize the space. Otherwise, if the new extents are being - * inserted into the middle of the existing entries, a bit more work - * is required to make room for the new extents to be inserted. The - * caller is responsible for filling in the new extent entries upon - * return. - */ -void -xfs_iext_add( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin adding exts */ - int ext_diff) /* number of extents to add */ -{ - int byte_diff; /* new bytes being added */ - int new_size; /* size of extents after adding */ - xfs_extnum_t nextents; /* number of extents in file */ - - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT((idx >= 0) && (idx <= nextents)); - byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); - new_size = ifp->if_bytes + byte_diff; - /* - * If the new number of extents (nextents + ext_diff) - * fits inside the inode, then continue to use the inline - * extent buffer. - */ - if (nextents + ext_diff <= XFS_INLINE_EXTS) { - if (idx < nextents) { - memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], - &ifp->if_u2.if_inline_ext[idx], - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); - } - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - ifp->if_real_bytes = 0; - } - /* - * Otherwise use a linear (direct) extent list. - * If the extents are currently inside the inode, - * xfs_iext_realloc_direct will switch us from - * inline to direct extent allocation mode. - */ - else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { - xfs_iext_realloc_direct(ifp, new_size); - if (idx < nextents) { - memmove(&ifp->if_u1.if_extents[idx + ext_diff], - &ifp->if_u1.if_extents[idx], - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); - } - } - /* Indirection array */ - else { - xfs_ext_irec_t *erp; - int erp_idx = 0; - int page_idx = idx; - - ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); - if (ifp->if_flags & XFS_IFEXTIREC) { - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); - } else { - xfs_iext_irec_init(ifp); - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = ifp->if_u1.if_ext_irec; - } - /* Extents fit in target extent page */ - if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { - if (page_idx < erp->er_extcount) { - memmove(&erp->er_extbuf[page_idx + ext_diff], - &erp->er_extbuf[page_idx], - (erp->er_extcount - page_idx) * - sizeof(xfs_bmbt_rec_t)); - memset(&erp->er_extbuf[page_idx], 0, byte_diff); - } - erp->er_extcount += ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - } - /* Insert a new extent page */ - else if (erp) { - xfs_iext_add_indirect_multi(ifp, - erp_idx, page_idx, ext_diff); - } - /* - * If extent(s) are being appended to the last page in - * the indirection array and the new extent(s) don't fit - * in the page, then erp is NULL and erp_idx is set to - * the next index needed in the indirection array. - */ - else { - uint count = ext_diff; - - while (count) { - erp = xfs_iext_irec_new(ifp, erp_idx); - erp->er_extcount = min(count, XFS_LINEAR_EXTS); - count -= erp->er_extcount; - if (count) - erp_idx++; - } - } - } - ifp->if_bytes = new_size; -} - -/* - * This is called when incore extents are being added to the indirection - * array and the new extents do not fit in the target extent list. The - * erp_idx parameter contains the irec index for the target extent list - * in the indirection array, and the idx parameter contains the extent - * index within the list. The number of extents being added is stored - * in the count parameter. - * - * |-------| |-------| - * | | | | idx - number of extents before idx - * | idx | | count | - * | | | | count - number of extents being inserted at idx - * |-------| |-------| - * | count | | nex2 | nex2 - number of extents after idx + count - * |-------| |-------| - */ -void -xfs_iext_add_indirect_multi( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx, /* target extent irec index */ - xfs_extnum_t idx, /* index within target list */ - int count) /* new extents being added */ -{ - int byte_diff; /* new bytes being added */ - xfs_ext_irec_t *erp; /* pointer to irec entry */ - xfs_extnum_t ext_diff; /* number of extents to add */ - xfs_extnum_t ext_cnt; /* new extents still needed */ - xfs_extnum_t nex2; /* extents after idx + count */ - xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ - int nlists; /* number of irec's (lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nex2 = erp->er_extcount - idx; - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - - /* - * Save second part of target extent list - * (all extents past */ - if (nex2) { - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); - memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); - erp->er_extcount -= nex2; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); - memset(&erp->er_extbuf[idx], 0, byte_diff); - } - - /* - * Add the new extents to the end of the target - * list, then allocate new irec record(s) and - * extent buffer(s) as needed to store the rest - * of the new extents. - */ - ext_cnt = count; - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); - if (ext_diff) { - erp->er_extcount += ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - ext_cnt -= ext_diff; - } - while (ext_cnt) { - erp_idx++; - erp = xfs_iext_irec_new(ifp, erp_idx); - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); - erp->er_extcount = ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - ext_cnt -= ext_diff; - } - - /* Add nex2 extents back to indirection array */ - if (nex2) { - xfs_extnum_t ext_avail; - int i; - - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; - i = 0; - /* - * If nex2 extents fit in the current page, append - * nex2_ep after the new extents. - */ - if (nex2 <= ext_avail) { - i = erp->er_extcount; - } - /* - * Otherwise, check if space is available in the - * next page. - */ - else if ((erp_idx < nlists - 1) && - (nex2 <= (ext_avail = XFS_LINEAR_EXTS - - ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { - erp_idx++; - erp++; - /* Create a hole for nex2 extents */ - memmove(&erp->er_extbuf[nex2], erp->er_extbuf, - erp->er_extcount * sizeof(xfs_bmbt_rec_t)); - } - /* - * Final choice, create a new extent page for - * nex2 extents. - */ - else { - erp_idx++; - erp = xfs_iext_irec_new(ifp, erp_idx); - } - memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); - kmem_free(nex2_ep); - erp->er_extcount += nex2; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); - } -} - -/* - * This is called when the amount of space required for incore file - * extents needs to be decreased. The ext_diff parameter stores the - * number of extents to be removed and the idx parameter contains - * the extent index where the extents will be removed from. - * - * If the amount of space needed has decreased below the linear - * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous - * extent array. Otherwise, use kmem_realloc() to adjust the - * size to what is needed. - */ -void -xfs_iext_remove( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff, /* number of extents to remove */ - int state) /* type of extent conversion */ -{ - xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; - xfs_extnum_t nextents; /* number of extents in file */ - int new_size; /* size of extents after removal */ - - trace_xfs_iext_remove(ip, idx, state, _RET_IP_); - - ASSERT(ext_diff > 0); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); - - if (new_size == 0) { - xfs_iext_destroy(ifp); - } else if (ifp->if_flags & XFS_IFEXTIREC) { - xfs_iext_remove_indirect(ifp, idx, ext_diff); - } else if (ifp->if_real_bytes) { - xfs_iext_remove_direct(ifp, idx, ext_diff); - } else { - xfs_iext_remove_inline(ifp, idx, ext_diff); - } - ifp->if_bytes = new_size; -} - -/* - * This removes ext_diff extents from the inline buffer, beginning - * at extent index idx. - */ -void -xfs_iext_remove_inline( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff) /* number of extents to remove */ -{ - int nextents; /* number of extents in file */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - ASSERT(idx < XFS_INLINE_EXTS); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(((nextents - ext_diff) > 0) && - (nextents - ext_diff) < XFS_INLINE_EXTS); - - if (idx + ext_diff < nextents) { - memmove(&ifp->if_u2.if_inline_ext[idx], - &ifp->if_u2.if_inline_ext[idx + ext_diff], - (nextents - (idx + ext_diff)) * - sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); - } else { - memset(&ifp->if_u2.if_inline_ext[idx], 0, - ext_diff * sizeof(xfs_bmbt_rec_t)); - } -} - -/* - * This removes ext_diff extents from a linear (direct) extent list, - * beginning at extent index idx. If the extents are being removed - * from the end of the list (ie. truncate) then we just need to re- - * allocate the list to remove the extra space. Otherwise, if the - * extents are being removed from the middle of the existing extent - * entries, then we first need to move the extent records beginning - * at idx + ext_diff up in the list to overwrite the records being - * removed, then remove the extra space via kmem_realloc. - */ -void -xfs_iext_remove_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff) /* number of extents to remove */ -{ - xfs_extnum_t nextents; /* number of extents in file */ - int new_size; /* size of extents after removal */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - new_size = ifp->if_bytes - - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - - if (new_size == 0) { - xfs_iext_destroy(ifp); - return; - } - /* Move extents up in the list (if needed) */ - if (idx + ext_diff < nextents) { - memmove(&ifp->if_u1.if_extents[idx], - &ifp->if_u1.if_extents[idx + ext_diff], - (nextents - (idx + ext_diff)) * - sizeof(xfs_bmbt_rec_t)); - } - memset(&ifp->if_u1.if_extents[nextents - ext_diff], - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); - /* - * Reallocate the direct extent list. If the extents - * will fit inside the inode then xfs_iext_realloc_direct - * will switch from direct to inline extent allocation - * mode for us. - */ - xfs_iext_realloc_direct(ifp, new_size); - ifp->if_bytes = new_size; -} - -/* - * This is called when incore extents are being removed from the - * indirection array and the extents being removed span multiple extent - * buffers. The idx parameter contains the file extent index where we - * want to begin removing extents, and the count parameter contains - * how many extents need to be removed. - * - * |-------| |-------| - * | nex1 | | | nex1 - number of extents before idx - * |-------| | count | - * | | | | count - number of extents being removed at idx - * | count | |-------| - * | | | nex2 | nex2 - number of extents after idx + count - * |-------| |-------| - */ -void -xfs_iext_remove_indirect( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing extents */ - int count) /* number of extents to remove */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int erp_idx = 0; /* indirection array index */ - xfs_extnum_t ext_cnt; /* extents left to remove */ - xfs_extnum_t ext_diff; /* extents to remove in current list */ - xfs_extnum_t nex1; /* number of extents before idx */ - xfs_extnum_t nex2; /* extents after idx + count */ - int page_idx = idx; /* index in target extent list */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); - ASSERT(erp != NULL); - nex1 = page_idx; - ext_cnt = count; - while (ext_cnt) { - nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); - ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); - /* - * Check for deletion of entire list; - * xfs_iext_irec_remove() updates extent offsets. - */ - if (ext_diff == erp->er_extcount) { - xfs_iext_irec_remove(ifp, erp_idx); - ext_cnt -= ext_diff; - nex1 = 0; - if (ext_cnt) { - ASSERT(erp_idx < ifp->if_real_bytes / - XFS_IEXT_BUFSZ); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nex1 = 0; - continue; - } else { - break; - } - } - /* Move extents up (if needed) */ - if (nex2) { - memmove(&erp->er_extbuf[nex1], - &erp->er_extbuf[nex1 + ext_diff], - nex2 * sizeof(xfs_bmbt_rec_t)); - } - /* Zero out rest of page */ - memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - - ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); - /* Update remaining counters */ - erp->er_extcount -= ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); - ext_cnt -= ext_diff; - nex1 = 0; - erp_idx++; - erp++; - } - ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); - xfs_iext_irec_compact(ifp); -} - -/* - * Create, destroy, or resize a linear (direct) block of extents. - */ -void -xfs_iext_realloc_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new size of extents after adding */ -{ - int rnew_size; /* real new size of extents */ - - rnew_size = new_size; - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || - ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && - (new_size != ifp->if_real_bytes))); - - /* Free extent records */ - if (new_size == 0) { - xfs_iext_destroy(ifp); - } - /* Resize direct extent list and zero any new bytes */ - else if (ifp->if_real_bytes) { - /* Check if extents will fit inside the inode */ - if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { - xfs_iext_direct_to_inline(ifp, new_size / - (uint)sizeof(xfs_bmbt_rec_t)); - ifp->if_bytes = new_size; - return; - } - if (!is_power_of_2(new_size)){ - rnew_size = roundup_pow_of_two(new_size); - } - if (rnew_size != ifp->if_real_bytes) { - ifp->if_u1.if_extents = - kmem_realloc(ifp->if_u1.if_extents, - rnew_size, - ifp->if_real_bytes, KM_NOFS); - } - if (rnew_size > ifp->if_real_bytes) { - memset(&ifp->if_u1.if_extents[ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t)], 0, - rnew_size - ifp->if_real_bytes); - } - } - /* Switch from the inline extent buffer to a direct extent list */ - else { - if (!is_power_of_2(new_size)) { - rnew_size = roundup_pow_of_two(new_size); - } - xfs_iext_inline_to_direct(ifp, rnew_size); - } - ifp->if_real_bytes = rnew_size; - ifp->if_bytes = new_size; -} - -/* - * Switch from linear (direct) extent records to inline buffer. - */ -void -xfs_iext_direct_to_inline( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t nextents) /* number of extents in file */ -{ - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ASSERT(nextents <= XFS_INLINE_EXTS); - /* - * The inline buffer was zeroed when we switched - * from inline to direct extent allocation mode, - * so we don't need to clear it here. - */ - memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, - nextents * sizeof(xfs_bmbt_rec_t)); - kmem_free(ifp->if_u1.if_extents); - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - ifp->if_real_bytes = 0; -} - -/* - * Switch from inline buffer to linear (direct) extent records. - * new_size should already be rounded up to the next power of 2 - * by the caller (when appropriate), so use new_size as it is. - * However, since new_size may be rounded up, we can't update - * if_bytes here. It is the caller's responsibility to update - * if_bytes upon return. - */ -void -xfs_iext_inline_to_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* number of extents in file */ -{ - ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); - memset(ifp->if_u1.if_extents, 0, new_size); - if (ifp->if_bytes) { - memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, - ifp->if_bytes); - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * - sizeof(xfs_bmbt_rec_t)); - } - ifp->if_real_bytes = new_size; -} - -/* - * Resize an extent indirection array to new_size bytes. - */ -STATIC void -xfs_iext_realloc_indirect( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new indirection array size */ -{ - int nlists; /* number of irec's (ex lists) */ - int size; /* current indirection array size */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - size = nlists * sizeof(xfs_ext_irec_t); - ASSERT(ifp->if_real_bytes); - ASSERT((new_size >= 0) && (new_size != size)); - if (new_size == 0) { - xfs_iext_destroy(ifp); - } else { - ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) - kmem_realloc(ifp->if_u1.if_ext_irec, - new_size, size, KM_NOFS); - } -} - -/* - * Switch from indirection array to linear (direct) extent allocations. - */ -STATIC void -xfs_iext_indirect_to_direct( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t nextents; /* number of extents in file */ - int size; /* size of file extents */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(nextents <= XFS_LINEAR_EXTS); - size = nextents * sizeof(xfs_bmbt_rec_t); - - xfs_iext_irec_compact_pages(ifp); - ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); - - ep = ifp->if_u1.if_ext_irec->er_extbuf; - kmem_free(ifp->if_u1.if_ext_irec); - ifp->if_flags &= ~XFS_IFEXTIREC; - ifp->if_u1.if_extents = ep; - ifp->if_bytes = size; - if (nextents < XFS_LINEAR_EXTS) { - xfs_iext_realloc_direct(ifp, size); - } -} - -/* - * Free incore file extents. - */ -void -xfs_iext_destroy( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - if (ifp->if_flags & XFS_IFEXTIREC) { - int erp_idx; - int nlists; - - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { - xfs_iext_irec_remove(ifp, erp_idx); - } - ifp->if_flags &= ~XFS_IFEXTIREC; - } else if (ifp->if_real_bytes) { - kmem_free(ifp->if_u1.if_extents); - } else if (ifp->if_bytes) { - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * - sizeof(xfs_bmbt_rec_t)); - } - ifp->if_u1.if_extents = NULL; - ifp->if_real_bytes = 0; - ifp->if_bytes = 0; -} - -/* - * Return a pointer to the extent record for file system block bno. - */ -xfs_bmbt_rec_host_t * /* pointer to found extent record */ -xfs_iext_bno_to_ext( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number to search for */ - xfs_extnum_t *idxp) /* index of target extent */ -{ - xfs_bmbt_rec_host_t *base; /* pointer to first extent */ - xfs_filblks_t blockcount = 0; /* number of blocks in extent */ - xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ - int high; /* upper boundary in search */ - xfs_extnum_t idx = 0; /* index of target extent */ - int low; /* lower boundary in search */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_fileoff_t startoff = 0; /* start offset of extent */ - - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *idxp = 0; - return NULL; - } - low = 0; - if (ifp->if_flags & XFS_IFEXTIREC) { - /* Find target extent list */ - int erp_idx = 0; - erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); - base = erp->er_extbuf; - high = erp->er_extcount - 1; - } else { - base = ifp->if_u1.if_extents; - high = nextents - 1; - } - /* Binary search extent records */ - while (low <= high) { - idx = (low + high) >> 1; - ep = base + idx; - startoff = xfs_bmbt_get_startoff(ep); - blockcount = xfs_bmbt_get_blockcount(ep); - if (bno < startoff) { - high = idx - 1; - } else if (bno >= startoff + blockcount) { - low = idx + 1; - } else { - /* Convert back to file-based extent index */ - if (ifp->if_flags & XFS_IFEXTIREC) { - idx += erp->er_extoff; - } - *idxp = idx; - return ep; - } - } - /* Convert back to file-based extent index */ - if (ifp->if_flags & XFS_IFEXTIREC) { - idx += erp->er_extoff; - } - if (bno >= startoff + blockcount) { - if (++idx == nextents) { - ep = NULL; - } else { - ep = xfs_iext_get_ext(ifp, idx); - } - } - *idxp = idx; - return ep; -} - -/* - * Return a pointer to the indirection array entry containing the - * extent record for filesystem block bno. Store the index of the - * target irec in *erp_idxp. - */ -xfs_ext_irec_t * /* pointer to found extent record */ -xfs_iext_bno_to_irec( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number to search for */ - int *erp_idxp) /* irec index of target ext list */ -{ - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ - xfs_ext_irec_t *erp_next; /* next indirection array entry */ - int erp_idx; /* indirection array index */ - int nlists; /* number of extent irec's (lists) */ - int high; /* binary search upper limit */ - int low; /* binary search lower limit */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp_idx = 0; - low = 0; - high = nlists - 1; - while (low <= high) { - erp_idx = (low + high) >> 1; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; - if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { - high = erp_idx - 1; - } else if (erp_next && bno >= - xfs_bmbt_get_startoff(erp_next->er_extbuf)) { - low = erp_idx + 1; - } else { - break; - } - } - *erp_idxp = erp_idx; - return erp; -} - -/* - * Return a pointer to the indirection array entry containing the - * extent record at file extent index *idxp. Store the index of the - * target irec in *erp_idxp and store the page index of the target - * extent record in *idxp. - */ -xfs_ext_irec_t * -xfs_iext_idx_to_irec( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t *idxp, /* extent index (file -> page) */ - int *erp_idxp, /* pointer to target irec */ - int realloc) /* new bytes were just added */ -{ - xfs_ext_irec_t *prev; /* pointer to previous irec */ - xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ - int erp_idx; /* indirection array index */ - int nlists; /* number of irec's (ex lists) */ - int high; /* binary search upper limit */ - int low; /* binary search lower limit */ - xfs_extnum_t page_idx = *idxp; /* extent index in target list */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(page_idx >= 0); - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); - - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp_idx = 0; - low = 0; - high = nlists - 1; - - /* Binary search extent irec's */ - while (low <= high) { - erp_idx = (low + high) >> 1; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - prev = erp_idx > 0 ? erp - 1 : NULL; - if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && - realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { - high = erp_idx - 1; - } else if (page_idx > erp->er_extoff + erp->er_extcount || - (page_idx == erp->er_extoff + erp->er_extcount && - !realloc)) { - low = erp_idx + 1; - } else if (page_idx == erp->er_extoff + erp->er_extcount && - erp->er_extcount == XFS_LINEAR_EXTS) { - ASSERT(realloc); - page_idx = 0; - erp_idx++; - erp = erp_idx < nlists ? erp + 1 : NULL; - break; - } else { - page_idx -= erp->er_extoff; - break; - } - } - *idxp = page_idx; - *erp_idxp = erp_idx; - return erp; -} - -/* - * Allocate and initialize an indirection array once the space needed - * for incore extents increases above XFS_IEXT_BUFSZ. - */ -void -xfs_iext_irec_init( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - xfs_extnum_t nextents; /* number of extents in file */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(nextents <= XFS_LINEAR_EXTS); - - erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); - - if (nextents == 0) { - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); - } else if (!ifp->if_real_bytes) { - xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); - } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { - xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); - } - erp->er_extbuf = ifp->if_u1.if_extents; - erp->er_extcount = nextents; - erp->er_extoff = 0; - - ifp->if_flags |= XFS_IFEXTIREC; - ifp->if_real_bytes = XFS_IEXT_BUFSZ; - ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); - ifp->if_u1.if_ext_irec = erp; - - return; -} - -/* - * Allocate and initialize a new entry in the indirection array. - */ -xfs_ext_irec_t * -xfs_iext_irec_new( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx) /* index for new irec */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - - /* Resize indirection array */ - xfs_iext_realloc_indirect(ifp, ++nlists * - sizeof(xfs_ext_irec_t)); - /* - * Move records down in the array so the - * new page can use erp_idx. - */ - erp = ifp->if_u1.if_ext_irec; - for (i = nlists - 1; i > erp_idx; i--) { - memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); - } - ASSERT(i == erp_idx); - - /* Initialize new extent record */ - erp = ifp->if_u1.if_ext_irec; - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; - memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); - erp[erp_idx].er_extcount = 0; - erp[erp_idx].er_extoff = erp_idx > 0 ? - erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; - return (&erp[erp_idx]); -} - -/* - * Remove a record from the indirection array. - */ -void -xfs_iext_irec_remove( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx) /* irec index to remove */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - if (erp->er_extbuf) { - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, - -erp->er_extcount); - kmem_free(erp->er_extbuf); - } - /* Compact extent records */ - erp = ifp->if_u1.if_ext_irec; - for (i = erp_idx; i < nlists - 1; i++) { - memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); - } - /* - * Manually free the last extent record from the indirection - * array. A call to xfs_iext_realloc_indirect() with a size - * of zero would result in a call to xfs_iext_destroy() which - * would in turn call this function again, creating a nasty - * infinite loop. - */ - if (--nlists) { - xfs_iext_realloc_indirect(ifp, - nlists * sizeof(xfs_ext_irec_t)); - } else { - kmem_free(ifp->if_u1.if_ext_irec); - } - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; -} - -/* - * This is called to clean up large amounts of unused memory allocated - * by the indirection array. Before compacting anything though, verify - * that the indirection array is still needed and switch back to the - * linear extent list (or even the inline buffer) if possible. The - * compaction policy is as follows: - * - * Full Compaction: Extents fit into a single page (or inline buffer) - * Partial Compaction: Extents occupy less than 50% of allocated space - * No Compaction: Extents occupy at least 50% of allocated space - */ -void -xfs_iext_irec_compact( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_extnum_t nextents; /* number of extents in file */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - - if (nextents == 0) { - xfs_iext_destroy(ifp); - } else if (nextents <= XFS_INLINE_EXTS) { - xfs_iext_indirect_to_direct(ifp); - xfs_iext_direct_to_inline(ifp, nextents); - } else if (nextents <= XFS_LINEAR_EXTS) { - xfs_iext_indirect_to_direct(ifp); - } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { - xfs_iext_irec_compact_pages(ifp); - } -} - -/* - * Combine extents from neighboring extent pages. - */ -void -xfs_iext_irec_compact_pages( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ - int erp_idx = 0; /* indirection array index */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - while (erp_idx < nlists - 1) { - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - erp_next = erp + 1; - if (erp_next->er_extcount <= - (XFS_LINEAR_EXTS - erp->er_extcount)) { - memcpy(&erp->er_extbuf[erp->er_extcount], - erp_next->er_extbuf, erp_next->er_extcount * - sizeof(xfs_bmbt_rec_t)); - erp->er_extcount += erp_next->er_extcount; - /* - * Free page before removing extent record - * so er_extoffs don't get modified in - * xfs_iext_irec_remove. - */ - kmem_free(erp_next->er_extbuf); - erp_next->er_extbuf = NULL; - xfs_iext_irec_remove(ifp, erp_idx + 1); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - } else { - erp_idx++; - } - } -} - -/* - * This is called to update the er_extoff field in the indirection - * array when extents have been added or removed from one of the - * extent lists. erp_idx contains the irec index to begin updating - * at and ext_diff contains the number of extents that were added - * or removed. - */ -void -xfs_iext_irec_update_extoffs( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx, /* irec index to update */ - int ext_diff) /* number of new extents */ -{ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - for (i = erp_idx; i < nlists; i++) { - ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; - } -} diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c deleted file mode 100644 index ee7e0e80246b..000000000000 --- a/fs/xfs/xfs_log_rlimit.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2013 Jie Liu. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_ag.h" -#include "xfs_sb.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_trans_space.h" -#include "xfs_inode.h" -#include "xfs_da_btree.h" -#include "xfs_attr_leaf.h" -#include "xfs_bmap_btree.h" - -/* - * Calculate the maximum length in bytes that would be required for a local - * attribute value as large attributes out of line are not logged. - */ -STATIC int -xfs_log_calc_max_attrsetm_res( - struct xfs_mount *mp) -{ - int size; - int nblks; - - size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - - MAXNAMELEN - 1; - nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); - nblks += XFS_B_TO_FSB(mp, size); - nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); - - return M_RES(mp)->tr_attrsetm.tr_logres + - M_RES(mp)->tr_attrsetrt.tr_logres * nblks; -} - -/* - * Iterate over the log space reservation table to figure out and return - * the maximum one in terms of the pre-calculated values which were done - * at mount time. - */ -STATIC void -xfs_log_get_max_trans_res( - struct xfs_mount *mp, - struct xfs_trans_res *max_resp) -{ - struct xfs_trans_res *resp; - struct xfs_trans_res *end_resp; - int log_space = 0; - int attr_space; - - attr_space = xfs_log_calc_max_attrsetm_res(mp); - - resp = (struct xfs_trans_res *)M_RES(mp); - end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1); - for (; resp < end_resp; resp++) { - int tmp = resp->tr_logcount > 1 ? - resp->tr_logres * resp->tr_logcount : - resp->tr_logres; - if (log_space < tmp) { - log_space = tmp; - *max_resp = *resp; /* struct copy */ - } - } - - if (attr_space > log_space) { - *max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */ - max_resp->tr_logres = attr_space; - } -} - -/* - * Calculate the minimum valid log size for the given superblock configuration. - * Used to calculate the minimum log size at mkfs time, and to determine if - * the log is large enough or not at mount time. Returns the minimum size in - * filesystem block size units. - */ -int -xfs_log_calc_minimum_size( - struct xfs_mount *mp) -{ - struct xfs_trans_res tres = {0}; - int max_logres; - int min_logblks = 0; - int lsunit = 0; - - xfs_log_get_max_trans_res(mp, &tres); - - max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres); - if (tres.tr_logcount > 1) - max_logres *= tres.tr_logcount; - - if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) - lsunit = BTOBB(mp->m_sb.sb_logsunit); - - /* - * Two factors should be taken into account for calculating the minimum - * log space. - * 1) The fundamental limitation is that no single transaction can be - * larger than half size of the log. - * - * From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR - * define, which is set to 3. That means we can definitely fit - * maximally sized 2 transactions in the log. We'll use this same - * value here. - * - * 2) If the lsunit option is specified, a transaction requires 2 LSU - * for the reservation because there are two log writes that can - * require padding - the transaction data and the commit record which - * are written separately and both can require padding to the LSU. - * Consider that we can have an active CIL reservation holding 2*LSU, - * but the CIL is not over a push threshold, in this case, if we - * don't have enough log space for at one new transaction, which - * includes another 2*LSU in the reservation, we will run into dead - * loop situation in log space grant procedure. i.e. - * xlog_grant_head_wait(). - * - * Hence the log size needs to be able to contain two maximally sized - * and padded transactions, which is (2 * (2 * LSU + maxlres)). - * - * Also, the log size should be a multiple of the log stripe unit, round - * it up to lsunit boundary if lsunit is specified. - */ - if (lsunit) { - min_logblks = roundup_64(BTOBB(max_logres), lsunit) + - 2 * lsunit; - } else - min_logblks = BTOBB(max_logres) + 2 * BBSIZE; - min_logblks *= XFS_MIN_LOG_FACTOR; - - return XFS_BB_TO_FSB(mp, min_logblks); -} diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c deleted file mode 100644 index f4dd697cac08..000000000000 --- a/fs/xfs/xfs_rtbitmap.c +++ /dev/null @@ -1,973 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" -#include "xfs_icache.h" -#include "xfs_dinode.h" -#include "xfs_rtalloc.h" - - -/* - * Realtime allocator bitmap functions shared with userspace. - */ - -/* - * Get a buffer for the bitmap or summary file block specified. - * The buffer is returned read and locked. - */ -int -xfs_rtbuf_get( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t block, /* block number in bitmap or summary */ - int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ -{ - xfs_buf_t *bp; /* block buffer, result */ - xfs_inode_t *ip; /* bitmap or summary inode */ - xfs_bmbt_irec_t map; - int nmap = 1; - int error; /* error value */ - - ip = issum ? mp->m_rsumip : mp->m_rbmip; - - error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); - if (error) - return error; - - ASSERT(map.br_startblock != NULLFSBLOCK); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, NULL); - if (error) - return error; - *bpp = bp; - return 0; -} - -/* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -int -xfs_rtfind_back( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t firstbit; /* first useful bit in the word */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = start - limit + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit < XFS_NBWORD - 1) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); - mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << - firstbit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = bit - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i = bit - firstbit + 1; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the previous one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if (len - i) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_NBWORD - (len - i); - mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } else - i = len; - } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start - i + 1; - return 0; -} - -/* - * Searching forward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -int -xfs_rtfind_forw( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in the word */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = limit - start + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit) { - /* - * Calculate last (rightmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *rtblock = start + i - 1; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the previous word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Calculate mask for all the relevant bits in this word. - */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } else - i = len; - } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start + i - 1; - return 0; -} - -/* - * Read and modify the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -int -xfs_rtmodify_summary( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_buf_t *bp; /* buffer for the summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information, modify and log it. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), - (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); - return 0; -} - -/* - * Set the given range of bitmap bits to the given value. - * Do whatever I/O and logging is required. - */ -int -xfs_rtmodify_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to modify */ - xfs_extlen_t len, /* length of extent to modify */ - int val) /* 1 for free, 0 for allocated */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtword_t *first; /* first used word in the buffer */ - int i; /* current bit number rel. to start */ - int lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask o frelevant bits for value */ - int word; /* word number in the buffer */ - - /* - * Compute starting bitmap block number. - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block, and point to its data. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - first = b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zeroes; 1 (free) => all ones. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not changed and mask of relevant bits. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - i = lastbit - bit; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Set the word value correctly. - */ - *b = val; - i += XFS_NBWORD; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Compute a mask of relevant bits. - */ - bit = 0; - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - b++; - } - /* - * Log any remaining changed bytes. - */ - if (b > first) - xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp - 1)); - return 0; -} - -/* - * Mark an extent specified by start and len freed. - * Updates all the summary information as well as the bitmap. - */ -int -xfs_rtfree_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to free */ - xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_rtblock_t end; /* end of the freed extent */ - int error; /* error value */ - xfs_rtblock_t postblock; /* first block freed > end */ - xfs_rtblock_t preblock; /* first block freed < start */ - - end = start + len - 1; - /* - * Modify the bitmap to mark this extent freed. - */ - error = xfs_rtmodify_range(mp, tp, start, len, 1); - if (error) { - return error; - } - /* - * Assume we're freeing out of the middle of an allocated extent. - * We need to find the beginning and end of the extent so we can - * properly update the summary. - */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); - if (error) { - return error; - } - /* - * Find the next allocated block (end of allocated extent). - */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); - if (error) - return error; - /* - * If there are blocks not being freed at the front of the - * old extent, add summary data for them to be allocated. - */ - if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * If there are blocks not being freed at the end of the - * old extent, add summary data for them to be allocated. - */ - if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * Increment the summary information corresponding to the entire - * (new) free extent. - */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); - return error; -} - -/* - * Check that the given range is either all allocated (val = 0) or - * all free (val = 1). - */ -int -xfs_rtcheck_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block number of extent */ - xfs_extlen_t len, /* length of extent */ - int val, /* 1 for free, 0 for allocated */ - xfs_rtblock_t *new, /* out: first block not matching */ - int *stat) /* out: 1 for matches, 0 for not */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute starting bitmap block number - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zero's; 1 (free) => all one's. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not examined. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - /* - * Mask of relevant bits. - */ - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ val) & mask)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *new = start + i; - *stat = 0; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ val)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Mask of relevant bits. - */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ val) & mask)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } else - i = len; - } - /* - * Successful, return. - */ - xfs_trans_brelse(tp, bp); - *new = start + i; - *stat = 1; - return 0; -} - -#ifdef DEBUG -/* - * Check that the given extent (block range) is allocated already. - */ -STATIC int /* error */ -xfs_rtcheck_alloc_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number of extent */ - xfs_extlen_t len) /* length of extent */ -{ - xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ - int stat; - int error; - - error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); - if (error) - return error; - ASSERT(stat); - return 0; -} -#else -#define xfs_rtcheck_alloc_range(m,t,b,l) (0) -#endif -/* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len) /* length of extent freed */ -{ - int error; /* error value */ - xfs_mount_t *mp; /* file system mount structure */ - xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ - - mp = tp->t_mountp; - - ASSERT(mp->m_rbmip->i_itemp != NULL); - ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - - error = xfs_rtcheck_alloc_range(mp, tp, bno, len); - if (error) - return error; - - /* - * Free the range of realtime blocks. - */ - error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); - if (error) { - return error; - } - /* - * Mark more blocks free in the superblock. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); - /* - * If we've now freed all the blocks, reset the file sequence - * number to 0. - */ - if (tp->t_frextents_delta + mp->m_sb.sb_frextents == - mp->m_sb.sb_rextents) { - if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) - mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - } - return 0; -} - diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c deleted file mode 100644 index 23c2f2577c8d..000000000000 --- a/fs/xfs/xfs_symlink_remote.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * Copyright (c) 2012-2013 Red Hat, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" -#include "xfs_ag.h" -#include "xfs_sb.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_symlink.h" -#include "xfs_cksum.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" - - -/* - * Each contiguous block has a header, so it is not just a simple pathlen - * to FSB conversion. - */ -int -xfs_symlink_blocks( - struct xfs_mount *mp, - int pathlen) -{ - int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize); - - return (pathlen + buflen - 1) / buflen; -} - -int -xfs_symlink_hdr_set( - struct xfs_mount *mp, - xfs_ino_t ino, - uint32_t offset, - uint32_t size, - struct xfs_buf *bp) -{ - struct xfs_dsymlink_hdr *dsl = bp->b_addr; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return 0; - - dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); - dsl->sl_offset = cpu_to_be32(offset); - dsl->sl_bytes = cpu_to_be32(size); - uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid); - dsl->sl_owner = cpu_to_be64(ino); - dsl->sl_blkno = cpu_to_be64(bp->b_bn); - bp->b_ops = &xfs_symlink_buf_ops; - - return sizeof(struct xfs_dsymlink_hdr); -} - -/* - * Checking of the symlink header is split into two parts. the verifier does - * CRC, location and bounds checking, the unpacking function checks the path - * parameters and owner. - */ -bool -xfs_symlink_hdr_ok( - xfs_ino_t ino, - uint32_t offset, - uint32_t size, - struct xfs_buf *bp) -{ - struct xfs_dsymlink_hdr *dsl = bp->b_addr; - - if (offset != be32_to_cpu(dsl->sl_offset)) - return false; - if (size != be32_to_cpu(dsl->sl_bytes)) - return false; - if (ino != be64_to_cpu(dsl->sl_owner)) - return false; - - /* ok */ - return true; -} - -static bool -xfs_symlink_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dsymlink_hdr *dsl = bp->b_addr; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) - return false; - if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid)) - return false; - if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) - return false; - if (be32_to_cpu(dsl->sl_offset) + - be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN) - return false; - if (dsl->sl_owner == 0) - return false; - - return true; -} - -static void -xfs_symlink_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); - else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); -} - -static void -xfs_symlink_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; - - /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } - - if (bip) { - struct xfs_dsymlink_hdr *dsl = bp->b_addr; - dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - } - xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); -} - -const struct xfs_buf_ops xfs_symlink_buf_ops = { - .verify_read = xfs_symlink_read_verify, - .verify_write = xfs_symlink_write_verify, -}; - -void -xfs_symlink_local_to_remote( - struct xfs_trans *tp, - struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp) -{ - struct xfs_mount *mp = ip->i_mount; - char *buf; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) { - bp->b_ops = NULL; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); - return; - } - - /* - * As this symlink fits in an inode literal area, it must also fit in - * the smallest buffer the filesystem supports. - */ - ASSERT(BBTOB(bp->b_length) >= - ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr)); - - bp->b_ops = &xfs_symlink_buf_ops; - - buf = bp->b_addr; - buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); - memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); -} diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c deleted file mode 100644 index f2bda7c76b8a..000000000000 --- a/fs/xfs/xfs_trans_resv.c +++ /dev/null @@ -1,894 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * Copyright (C) 2010 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_qm.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" - -/* - * A buffer has a format structure overhead in the log in addition - * to the data, so we need to take this into account when reserving - * space in a transaction for a buffer. Round the space required up - * to a multiple of 128 bytes so that we don't change the historical - * reservation that has been used for this overhead. - */ -STATIC uint -xfs_buf_log_overhead(void) -{ - return round_up(sizeof(struct xlog_op_header) + - sizeof(struct xfs_buf_log_format), 128); -} - -/* - * Calculate out transaction log reservation per item in bytes. - * - * The nbufs argument is used to indicate the number of items that - * will be changed in a transaction. size is used to tell how many - * bytes should be reserved per item. - */ -STATIC uint -xfs_calc_buf_res( - uint nbufs, - uint size) -{ - return nbufs * (size + xfs_buf_log_overhead()); -} - -/* - * Logging inodes is really tricksy. They are logged in memory format, - * which means that what we write into the log doesn't directly translate into - * the amount of space they use on disk. - * - * Case in point - btree format forks in memory format use more space than the - * on-disk format. In memory, the buffer contains a normal btree block header so - * the btree code can treat it as though it is just another generic buffer. - * However, when we write it to the inode fork, we don't write all of this - * header as it isn't needed. e.g. the root is only ever in the inode, so - * there's no need for sibling pointers which would waste 16 bytes of space. - * - * Hence when we have an inode with a maximally sized btree format fork, then - * amount of information we actually log is greater than the size of the inode - * on disk. Hence we need an inode reservation function that calculates all this - * correctly. So, we log: - * - * - 4 log op headers for object - * - for the ilf, the inode core and 2 forks - * - inode log format object - * - the inode core - * - two inode forks containing bmap btree root blocks. - * - the btree data contained by both forks will fit into the inode size, - * hence when combined with the inode core above, we have a total of the - * actual inode size. - * - the BMBT headers need to be accounted separately, as they are - * additional to the records and pointers that fit inside the inode - * forks. - */ -STATIC uint -xfs_calc_inode_res( - struct xfs_mount *mp, - uint ninodes) -{ - return ninodes * - (4 * sizeof(struct xlog_op_header) + - sizeof(struct xfs_inode_log_format) + - mp->m_sb.sb_inodesize + - 2 * XFS_BMBT_BLOCK_LEN(mp)); -} - -/* - * The free inode btree is a conditional feature and the log reservation - * requirements differ slightly from that of the traditional inode allocation - * btree. The finobt tracks records for inode chunks with at least one free - * inode. A record can be removed from the tree for an inode allocation - * or free and thus the finobt reservation is unconditional across: - * - * - inode allocation - * - inode free - * - inode chunk allocation - * - * The 'modify' param indicates to include the record modification scenario. The - * 'alloc' param indicates to include the reservation for free space btree - * modifications on behalf of finobt modifications. This is required only for - * transactions that do not already account for free space btree modifications. - * - * the free inode btree: max depth * block size - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the free inode btree entry: block size - */ -STATIC uint -xfs_calc_finobt_res( - struct xfs_mount *mp, - int alloc, - int modify) -{ - uint res; - - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) - return 0; - - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); - if (alloc) - res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); - if (modify) - res += (uint)XFS_FSB_TO_B(mp, 1); - - return res; -} - -/* - * Various log reservation values. - * - * These are based on the size of the file system block because that is what - * most transactions manipulate. Each adds in an additional 128 bytes per - * item logged to try to account for the overhead of the transaction mechanism. - * - * Note: Most of the reservations underestimate the number of allocation - * groups into which they could free extents in the xfs_bmap_finish() call. - * This is because the number in the worst case is quite high and quite - * unusual. In order to fix this we need to change xfs_bmap_finish() to free - * extents in only a single AG at a time. This will require changes to the - * EFI code as well, however, so that the EFI for the extents not freed is - * logged again in each transaction. See SGI PV #261917. - * - * Reservation functions here avoid a huge stack in xfs_trans_init due to - * register overflow from temporaries in the calculations. - */ - - -/* - * In a write transaction we can allocate a maximum of 2 - * extents. This gives: - * the inode getting the new extents: inode size - * the inode's bmap btree: max depth * block size - * the agfs of the ags from which the extents are allocated: 2 * sector - * the superblock free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - * And the bmap_finish transaction can free bmap blocks in a join: - * the agfs of the ags containing the blocks: 2 * sector size - * the agfls of the ags containing the blocks: 2 * sector size - * the super block free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_write_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * In truncating a file we free up to two extents at once. We can modify: - * the inode being truncated: inode size - * the inode's bmap btree: (max depth + 1) * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -STATIC uint -xfs_calc_itruncate_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); -} - -/* - * In renaming a files we can modify: - * the four inodes involved: 4 * inode size - * the two directory btrees: 2 * (max depth + v2) * dir block size - * the two directory bmap btrees: 2 * max depth * block size - * And the bmap_finish transaction can free dir and bmap blocks (two sets - * of bmap blocks) giving: - * the agf for the ags in which the blocks live: 3 * sector size - * the agfl for the ags in which the blocks live: 3 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_rename_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_inode_res(mp, 4) + - xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * For removing an inode from unlinked list at first, we can modify: - * the agi hash list and counters: sector size - * the on disk inode before ours in the agi hash list: inode cluster size - */ -STATIC uint -xfs_calc_iunlink_remove_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); -} - -/* - * For creating a link to an inode: - * the parent directory inode: inode size - * the linked inode: inode size - * the directory btree could split: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free some bmap blocks giving: - * the agf for the ag in which the blocks live: sector size - * the agfl for the ag in which the blocks live: sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_link_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_iunlink_remove_reservation(mp) + - MAX((xfs_calc_inode_res(mp, 2) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * For adding an inode to unlinked list we can modify: - * the agi hash list: sector size - * the unlinked inode: inode size - */ -STATIC uint -xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 1); -} - -/* - * For removing a directory entry we can modify: - * the parent directory inode: inode size - * the removed inode: inode size - * the directory btree could join: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free the dir and bmap blocks giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_remove_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_iunlink_add_reservation(mp) + - MAX((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * For create, break it in to the two cases that the transaction - * covers. We start with the modify case - allocation done by modification - * of the state of existing inodes - and the allocation case. - */ - -/* - * For create we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: block size - * the superblock for the nlink flag: sector size - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * the finobt (record modification and allocation btrees) - */ -STATIC uint -xfs_calc_create_resv_modify( - struct xfs_mount *mp) -{ - return xfs_calc_inode_res(mp, 2) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - (uint)XFS_FSB_TO_B(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 1, 1); -} - -/* - * For create we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -STATIC uint -xfs_calc_create_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -STATIC uint -__xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_create_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); -} - -/* - * For icreate we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the finobt (record insertion) - */ -STATIC uint -xfs_calc_icreate_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 0); -} - -STATIC uint -xfs_calc_icreate_reservation(xfs_mount_t *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_icreate_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); -} - -STATIC uint -xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return xfs_calc_icreate_reservation(mp); - return __xfs_calc_create_reservation(mp); - -} - -STATIC uint -xfs_calc_create_tmpfile_reservation( - struct xfs_mount *mp) -{ - uint res = XFS_DQUOT_LOGRES(mp); - - if (xfs_sb_version_hascrc(&mp->m_sb)) - res += xfs_calc_icreate_resv_alloc(mp); - else - res += xfs_calc_create_resv_alloc(mp); - - return res + xfs_calc_iunlink_add_reservation(mp); -} - -/* - * Making a new directory is the same as creating a new file. - */ -STATIC uint -xfs_calc_mkdir_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_create_reservation(mp); -} - - -/* - * Making a new symplink is the same as creating a new file, but - * with the added blocks for remote symlink data which can be up to 1kB in - * length (MAXPATHLEN). - */ -STATIC uint -xfs_calc_symlink_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_create_reservation(mp) + - xfs_calc_buf_res(1, MAXPATHLEN); -} - -/* - * In freeing an inode we can modify: - * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the finobt (record insertion, removal or modification) - */ -STATIC uint -xfs_calc_ifree_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 1); -} - -/* - * When only changing the inode we log the inode and possibly the superblock - * We also add a bit of slop for the transaction stuff. - */ -STATIC uint -xfs_calc_ichange_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); - -} - -/* - * Growing the data section of the filesystem. - * superblock - * agi and agf - * allocation btrees - */ -STATIC uint -xfs_calc_growdata_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -/* - * Growing the rt section of the filesystem. - * In the first set of transactions (ALLOC) we allocate space to the - * bitmap or summary files. - * superblock: sector size - * agf of the ag from which the extent is allocated: sector size - * bmap btree for bitmap/summary inode: max depth * blocksize - * bitmap/summary inode: inode size - * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize - */ -STATIC uint -xfs_calc_growrtalloc_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -/* - * Growing the rt section of the filesystem. - * In the second set of transactions (ZERO) we zero the new metadata blocks. - * one bitmap/summary block: blocksize - */ -STATIC uint -xfs_calc_growrtzero_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); -} - -/* - * Growing the rt section of the filesystem. - * In the third set of transactions (FREE) we update metadata without - * allocating any new blocks. - * superblock: sector size - * bitmap inode: inode size - * summary inode: inode size - * one bitmap block: blocksize - * summary blocks: new summary size - */ -STATIC uint -xfs_calc_growrtfree_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 2) + - xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + - xfs_calc_buf_res(1, mp->m_rsumsize); -} - -/* - * Logging the inode modification timestamp on a synchronous write. - * inode - */ -STATIC uint -xfs_calc_swrite_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_inode_res(mp, 1); -} - -/* - * Logging the inode mode bits when writing a setuid/setgid file - * inode - */ -STATIC uint -xfs_calc_writeid_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_inode_res(mp, 1); -} - -/* - * Converting the inode from non-attributed to attributed. - * the inode being converted: inode size - * agf block and superblock (for block allocation) - * the new block (directory sized) - * bmap blocks for the new directory block - * allocation btrees - */ -STATIC uint -xfs_calc_addafork_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + - xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -/* - * Removing the attribute fork of a file - * the inode being truncated: inode size - * the inode's bmap btree: max depth * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_attrinval_reservation( - struct xfs_mount *mp) -{ - return MAX((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * Setting an attribute at mount time. - * the inode getting the attribute - * the superblock for allocations - * the agfs extents are allocated from - * the attribute btree * max depth - * the inode allocation btree - * Since attribute transaction space is dependent on the size of the attribute, - * the calculation is done partially at mount time and partially at runtime(see - * below). - */ -STATIC uint -xfs_calc_attrsetm_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); -} - -/* - * Setting an attribute at runtime, transaction space unit per block. - * the superblock for allocations: sector size - * the inode bmap btree could join or split: max depth * block size - * Since the runtime attribute transaction space is dependent on the total - * blocks needed for the 1st bmap, here we calculate out the space unit for - * one block so that the caller could figure out the total space according - * to the attibute extent length in blocks by: - * ext * M_RES(mp)->tr_attrsetrt.tr_logres - */ -STATIC uint -xfs_calc_attrsetrt_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), - XFS_FSB_TO_B(mp, 1)); -} - -/* - * Removing an attribute. - * the inode: inode size - * the attribute btree could join: max depth * block size - * the inode bmap btree could join or split: max depth * block size - * And the bmap_finish transaction can free the attr blocks freed giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -STATIC uint -xfs_calc_attrrm_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, - XFS_FSB_TO_B(mp, 1)) + - (uint)XFS_FSB_TO_B(mp, - XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), - (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), - XFS_FSB_TO_B(mp, 1)))); -} - -/* - * Clearing a bad agino number in an agi hash bucket. - */ -STATIC uint -xfs_calc_clear_agi_bucket_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* - * Clearing the quotaflags in the superblock. - * the super block for changing quota flags: sector size - */ -STATIC uint -xfs_calc_qm_sbchange_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* - * Adjusting quota limits. - * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) - */ -STATIC uint -xfs_calc_qm_setqlim_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); -} - -/* - * Allocating quota on disk if needed. - * the write transaction log space for quota file extent allocation - * the unit of quota allocation: one system block size - */ -STATIC uint -xfs_calc_qm_dqalloc_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_write_reservation(mp) + - xfs_calc_buf_res(1, - XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); -} - -/* - * Turning off quotas. - * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 - * the superblock for the quota flags: sector size - */ -STATIC uint -xfs_calc_qm_quotaoff_reservation( - struct xfs_mount *mp) -{ - return sizeof(struct xfs_qoff_logitem) * 2 + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* - * End of turning off quotas. - * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 - */ -STATIC uint -xfs_calc_qm_quotaoff_end_reservation( - struct xfs_mount *mp) -{ - return sizeof(struct xfs_qoff_logitem) * 2; -} - -/* - * Syncing the incore super block changes to disk. - * the super block to reflect the changes: sector size - */ -STATIC uint -xfs_calc_sb_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -void -xfs_trans_resv_calc( - struct xfs_mount *mp, - struct xfs_trans_resv *resp) -{ - /* - * The following transactions are logged in physical format and - * require a permanent reservation on space. - */ - resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); - resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; - resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); - resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; - resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); - resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT; - resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); - resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT; - resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); - resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT; - resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); - resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; - resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); - resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; - resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_create_tmpfile.tr_logres = - xfs_calc_create_tmpfile_reservation(mp); - resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; - resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); - resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; - resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); - resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; - resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp); - resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT; - resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp); - resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT; - resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp); - resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT; - resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp); - resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT; - resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp); - resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; - resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); - resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; - resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - /* - * The following transactions are logged in logical format with - * a default log count. - */ - resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); - resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); - resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp); - resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - resp->tr_qm_equotaoff.tr_logres = - xfs_calc_qm_quotaoff_end_reservation(mp); - resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); - resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - /* The following transaction are logged in logical format */ - resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); - resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); - resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); - resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); - resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); - resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); - resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); - resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); -} -- cgit v1.2.3