summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c92
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/btrfs/extent-tree.c22
-rw-r--r--fs/btrfs/extent_map.c3
-rw-r--r--fs/btrfs/file.c25
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/transaction.c27
-rw-r--r--fs/btrfs/volumes.c3
-rw-r--r--fs/dlm/user.c8
-rw-r--r--fs/fuse/cuse.c10
-rw-r--r--fs/fuse/dev.c128
-rw-r--r--fs/fuse/dir.c259
-rw-r--r--fs/fuse/file.c238
-rw-r--r--fs/fuse/fuse_i.h74
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs4client.c62
-rw-r--r--fs/nfs/nfs4state.c22
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nilfs2/ioctl.c5
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/proc_net.c14
-rw-r--r--fs/pstore/ram.c10
-rw-r--r--fs/select.c1
-rw-r--r--fs/sysfs/group.c42
-rw-r--r--fs/sysfs/symlink.c45
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr.c9
-rw-r--r--fs/xfs/xfs_bmap.c124
-rw-r--r--fs/xfs/xfs_buf.c22
-rw-r--r--fs/xfs/xfs_buf_item.c130
-rw-r--r--fs/xfs/xfs_buf_item.h14
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dquot.c12
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_iomap.c86
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_mount.c14
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c28
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c376
-rw-r--r--fs/xfs/xfs_trans.h18
-rw-r--r--fs/xfs/xfs_trans_ail.c14
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_trans_inode.c41
-rw-r--r--fs/xfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c12
64 files changed, 1392 insertions, 850 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
*/
struct p9_rdir {
- struct mutex mutex;
int head;
int tail;
- uint8_t *buf;
+ uint8_t buf[];
};
/**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
*
*/
-static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
+static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
{
- struct p9_rdir *rdir;
- struct p9_fid *fid;
- int err = 0;
-
- fid = filp->private_data;
- if (!fid->rdir) {
- rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
-
- if (rdir == NULL) {
- err = -ENOMEM;
- goto exit;
- }
- spin_lock(&filp->f_dentry->d_lock);
- if (!fid->rdir) {
- rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
- mutex_init(&rdir->mutex);
- rdir->head = rdir->tail = 0;
- fid->rdir = (void *) rdir;
- rdir = NULL;
- }
- spin_unlock(&filp->f_dentry->d_lock);
- kfree(rdir);
- }
-exit:
- return err;
+ struct p9_fid *fid = filp->private_data;
+ if (!fid->rdir)
+ fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
+ return fid->rdir;
}
/**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
buflen = fid->clnt->msize - P9_IOHDRSZ;
- err = v9fs_alloc_rdir_buf(filp, buflen);
- if (err)
- goto exit;
- rdir = (struct p9_rdir *) fid->rdir;
+ rdir = v9fs_alloc_rdir_buf(filp, buflen);
+ if (!rdir)
+ return -ENOMEM;
- err = mutex_lock_interruptible(&rdir->mutex);
- if (err)
- return err;
- while (err == 0) {
+ while (1) {
if (rdir->tail == rdir->head) {
err = v9fs_file_readn(filp, rdir->buf, NULL,
buflen, filp->f_pos);
if (err <= 0)
- goto unlock_and_exit;
+ return err;
rdir->head = 0;
rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
rdir->tail - rdir->head, &st);
if (err) {
p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
- err = -EIO;
p9stat_free(&st);
- goto unlock_and_exit;
+ return -EIO;
}
reclen = st.size+2;
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
p9stat_free(&st);
- if (over) {
- err = 0;
- goto unlock_and_exit;
- }
+ if (over)
+ return 0;
+
rdir->head += reclen;
filp->f_pos += reclen;
}
}
-
-unlock_and_exit:
- mutex_unlock(&rdir->mutex);
-exit:
- return err;
}
/**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
buflen = fid->clnt->msize - P9_READDIRHDRSZ;
- err = v9fs_alloc_rdir_buf(filp, buflen);
- if (err)
- goto exit;
- rdir = (struct p9_rdir *) fid->rdir;
+ rdir = v9fs_alloc_rdir_buf(filp, buflen);
+ if (!rdir)
+ return -ENOMEM;
- err = mutex_lock_interruptible(&rdir->mutex);
- if (err)
- return err;
-
- while (err == 0) {
+ while (1) {
if (rdir->tail == rdir->head) {
err = p9_client_readdir(fid, rdir->buf, buflen,
filp->f_pos);
if (err <= 0)
- goto unlock_and_exit;
+ return err;
rdir->head = 0;
rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
&curdirent);
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
- err = -EIO;
- goto unlock_and_exit;
+ return -EIO;
}
/* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
curdirent.d_type);
oldoffset = curdirent.d_off;
- if (over) {
- err = 0;
- goto unlock_and_exit;
- }
+ if (over)
+ return 0;
filp->f_pos = curdirent.d_off;
rdir->head += err;
}
}
-
-unlock_and_exit:
- mutex_unlock(&rdir->mutex);
-exit:
- return err;
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..7a3399767570 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
p9_client_clunk(fid);
return err;
}
- if (file->f_flags & O_TRUNC) {
- i_size_write(inode, 0);
- inode->i_blocks = 0;
- }
if ((file->f_flags & O_APPEND) &&
(!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
generic_file_llseek(file, 0, SEEK_END);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 890bed538f9b..57d017ac68e4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
break;
}
- if (uflags & O_TRUNC)
- ret |= P9_OTRUNC;
-
if (extended) {
if (uflags & O_EXCL)
ret |= P9_OEXCL;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 40895546e103..8d24ad66dfb8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
{ O_CREAT, P9_DOTL_CREATE },
{ O_EXCL, P9_DOTL_EXCL },
{ O_NOCTTY, P9_DOTL_NOCTTY },
- { O_TRUNC, P9_DOTL_TRUNC },
{ O_APPEND, P9_DOTL_APPEND },
{ O_NONBLOCK, P9_DOTL_NONBLOCK },
{ O_DSYNC, P9_DOTL_DSYNC },
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
}
/* Only creates */
- if (!(flags & O_CREAT) || dentry->d_inode)
- return finish_no_open(file, res);
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, res);
+ else if (dentry->d_inode) {
+ if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ return -EEXIST;
+ else
+ return finish_no_open(file, res);
+ }
v9ses = v9fs_inode2v9ses(dir);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..49d0b43458b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
#include <linux/elf.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
- cputime_to_timeval(p->utime, &prstatus->pr_utime);
- cputime_to_timeval(p->stime, &prstatus->pr_stime);
+ cputime_t utime, stime;
+
+ task_cputime(p, &utime, &stime);
+ cputime_to_timeval(utime, &prstatus->pr_utime);
+ cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
- cputime_to_timeval(p->utime, &prstatus->pr_utime);
- cputime_to_timeval(p->stime, &prstatus->pr_stime);
+ cputime_t utime, stime;
+
+ task_cputime(p, &utime, &stime);
+ cputime_to_timeval(utime, &prstatus->pr_utime);
+ cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a8b8adc05070..5a3327b8f90d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
unsigned nr_extents = 0;
int extra_reserve = 0;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
- int ret;
+ int ret = 0;
bool delalloc_lock = true;
/* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
- if (root->fs_info->quota_enabled) {
+ if (root->fs_info->quota_enabled)
ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize);
- if (ret) {
- spin_lock(&BTRFS_I(inode)->lock);
- calc_csum_metadata_size(inode, num_bytes, 0);
- spin_unlock(&BTRFS_I(inode)->lock);
- if (delalloc_lock)
- mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
- return ret;
- }
- }
- ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
+ /*
+ * ret != 0 here means the qgroup reservation failed, we go straight to
+ * the shared error handling then.
+ */
+ if (ret == 0)
+ ret = reserve_metadata_bytes(root, block_rsv,
+ to_reserve, flush);
+
if (ret) {
u64 to_free = 0;
unsigned dropped;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2e8cae63d247..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -288,7 +288,8 @@ out:
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
- try_merge_map(tree, em);
+ if (em->in_tree)
+ try_merge_map(tree, em);
}
/**
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f76b1fd160d4..aeb84469d2c4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
+ int index;
+ int ret;
/* get the inode */
key.objectid = defrag->root;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
+
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(inode_root)) {
- kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
- return PTR_ERR(inode_root);
+ ret = PTR_ERR(inode_root);
+ goto cleanup;
+ }
+ if (btrfs_root_refs(&inode_root->root_item) == 0) {
+ ret = -ENOENT;
+ goto cleanup;
}
key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
if (IS_ERR(inode)) {
- kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
- return PTR_ERR(inode);
+ ret = PTR_ERR(inode);
+ goto cleanup;
}
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
/* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
iput(inode);
return 0;
+cleanup:
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+ return ret;
}
/*
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err < 0 && num_written > 0)
num_written = err;
}
-out:
+
if (sync)
atomic_dec(&BTRFS_I(inode)->sync_writers);
+out:
sb_end_write(inode->i_sb);
current->backing_dev_info = NULL;
return num_written ? num_written : err;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5b22d45d3c6a..338f2597bf7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
BUG_ON(ret);
- d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
if (async_transid) {
*async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
}
if (err && !ret)
ret = err;
+
+ if (!ret)
+ d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
+
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
* if the disk i_size is already at the inode->i_size, or
* this ordered extent is inside the disk i_size, we're done
*/
- if (disk_i_size == i_size || offset <= disk_i_size) {
+ if (disk_i_size == i_size)
+ goto out;
+
+ /*
+ * We still need to update disk_i_size if outstanding_isize is greater
+ * than disk_i_size.
+ */
+ if (offset <= disk_i_size &&
+ (!ordered || ordered->outstanding_isize <= disk_i_size))
goto out;
- }
/*
* walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
break;
if (test->file_offset >= i_size)
break;
- if (test->file_offset >= disk_i_size) {
+ if (entry_end(test) > disk_i_size) {
/*
* we don't update disk_i_size now, so record this
* undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
int corrected = 0;
struct btrfs_key key;
struct inode *inode = NULL;
+ struct btrfs_fs_info *fs_info;
u64 end = offset + PAGE_SIZE - 1;
struct btrfs_root *local_root;
+ int srcu_index;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
- local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
- if (IS_ERR(local_root))
+
+ fs_info = fixup->root->fs_info;
+ srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
+
+ local_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(local_root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root);
+ }
key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum;
key.offset = 0;
- inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+ srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
}
if (PageUptodate(page)) {
- struct btrfs_fs_info *fs_info;
if (PageDirty(page)) {
/*
* we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
u64 physical_for_dev_replace;
u64 len;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
+ int srcu_index;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
+
+ srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
+
local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root))
+ if (IS_ERR(local_root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root);
+ }
key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+ srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode))
return PTR_ERR(inode);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f15494699f3b..fc03aa60b684 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
&root->fs_info->trans_block_rsv,
num_bytes, flush);
if (ret)
- return ERR_PTR(ret);
+ goto reserve_fail;
}
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
- if (!h)
- return ERR_PTR(-ENOMEM);
+ if (!h) {
+ ret = -ENOMEM;
+ goto alloc_fail;
+ }
/*
* If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +367,7 @@ again:
if (ret < 0) {
/* We must get the transaction if we are JOIN_NOLOCK. */
BUG_ON(type == TRANS_JOIN_NOLOCK);
-
- if (type < TRANS_JOIN_NOLOCK)
- sb_end_intwrite(root->fs_info->sb);
- kmem_cache_free(btrfs_trans_handle_cachep, h);
- return ERR_PTR(ret);
+ goto join_fail;
}
cur_trans = root->fs_info->running_transaction;
@@ -410,6 +408,19 @@ got_it:
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
return h;
+
+join_fail:
+ if (type < TRANS_JOIN_NOLOCK)
+ sb_end_intwrite(root->fs_info->sb);
+ kmem_cache_free(btrfs_trans_handle_cachep, h);
+alloc_fail:
+ if (num_bytes)
+ btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
+ num_bytes);
+reserve_fail:
+ if (qgroup_reserved)
+ btrfs_qgroup_free(root, qgroup_reserved);
+ return ERR_PTR(ret);
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 15f6efdf6463..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = 0;
/* Notify udev that device has changed */
- btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+ if (bdev)
+ btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
error_brelse:
brelse(bh);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
#endif
return -EINVAL;
-#ifdef CONFIG_COMPAT
- if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN)
-#else
+ /*
+ * can't compare against COMPAT/dlm_write_request32 because
+ * we don't yet know if is64bit is zero
+ */
if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
-#endif
return -EINVAL;
kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index e397b675b029..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -91,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
loff_t pos = 0;
+ struct iovec iov = { .iov_base = buf, .iov_len = count };
- return fuse_direct_io(file, buf, count, &pos, 0);
+ return fuse_direct_io(file, &iov, 1, count, &pos, 0);
}
static ssize_t cuse_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
loff_t pos = 0;
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
+
/*
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
- return fuse_direct_io(file, buf, count, &pos, 1);
+ return fuse_direct_io(file, &iov, 1, count, &pos, 1);
}
static int cuse_open(struct inode *inode, struct file *file)
@@ -419,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, 1);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
goto err;
@@ -449,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
req->out.argvar = 1;
req->out.argpages = 1;
req->pages[0] = page;
+ req->page_descs[0].length = req->out.args[1].size;
req->num_pages = 1;
req->end = cuse_process_init_reply;
fuse_request_send_background(fc, req);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e83351aa5bad..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
return file->private_data;
}
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_req *req, struct page **pages,
+ struct fuse_page_desc *page_descs,
+ unsigned npages)
{
memset(req, 0, sizeof(*req));
+ memset(pages, 0, sizeof(*pages) * npages);
+ memset(page_descs, 0, sizeof(*page_descs) * npages);
INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
atomic_set(&req->count, 1);
+ req->pages = pages;
+ req->page_descs = page_descs;
+ req->max_pages = npages;
}
-struct fuse_req *fuse_request_alloc(void)
+static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
{
- struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
- if (req)
- fuse_request_init(req);
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
+ if (req) {
+ struct page **pages;
+ struct fuse_page_desc *page_descs;
+
+ if (npages <= FUSE_REQ_INLINE_PAGES) {
+ pages = req->inline_pages;
+ page_descs = req->inline_page_descs;
+ } else {
+ pages = kmalloc(sizeof(struct page *) * npages, flags);
+ page_descs = kmalloc(sizeof(struct fuse_page_desc) *
+ npages, flags);
+ }
+
+ if (!pages || !page_descs) {
+ kfree(pages);
+ kfree(page_descs);
+ kmem_cache_free(fuse_req_cachep, req);
+ return NULL;
+ }
+
+ fuse_request_init(req, pages, page_descs, npages);
+ }
return req;
}
+
+struct fuse_req *fuse_request_alloc(unsigned npages)
+{
+ return __fuse_request_alloc(npages, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(fuse_request_alloc);
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
{
- struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
- if (req)
- fuse_request_init(req);
- return req;
+ return __fuse_request_alloc(npages, GFP_NOFS);
}
void fuse_request_free(struct fuse_req *req)
{
+ if (req->pages != req->inline_pages) {
+ kfree(req->pages);
+ kfree(req->page_descs);
+ }
kmem_cache_free(fuse_req_cachep, req);
}
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
req->in.h.pid = current->pid;
}
-struct fuse_req *fuse_get_req(struct fuse_conn *fc)
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
{
struct fuse_req *req;
sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
if (!fc->connected)
goto out;
- req = fuse_request_alloc();
+ req = fuse_request_alloc(npages);
err = -ENOMEM;
if (!req)
goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_file *ff = file->private_data;
spin_lock(&fc->lock);
- fuse_request_init(req);
+ fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
BUG_ON(ff->reserved_req);
ff->reserved_req = req;
wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
* filesystem should not have it's own file open. If deadlock is
* intentional, it can still be broken by "aborting" the filesystem.
*/
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+ struct file *file)
{
struct fuse_req *req;
atomic_inc(&fc->num_waiting);
wait_event(fc->blocked_waitq, !fc->blocked);
- req = fuse_request_alloc();
+ req = fuse_request_alloc(0);
if (!req)
req = get_reserved_req(fc, file);
@@ -406,9 +440,8 @@ __acquires(fc->lock)
}
}
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
{
- req->isreply = 1;
spin_lock(&fc->lock);
if (!fc->connected)
req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
spin_unlock(&fc->lock);
}
+
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ __fuse_request_send(fc, req);
+}
EXPORT_SYMBOL_GPL(fuse_request_send);
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
fuse_request_send_nowait_locked(fc, req);
}
+void fuse_force_forget(struct file *file, u64 nodeid)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_forget_in inarg;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.nlookup = 1;
+ req = fuse_get_req_nofail_nopages(fc, file);
+ req->in.h.opcode = FUSE_FORGET;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->isreply = 0;
+ __fuse_request_send(fc, req);
+ /* ignore errors */
+ fuse_put_request(fc, req);
+}
+
/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
@@ -850,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
{
unsigned i;
struct fuse_req *req = cs->req;
- unsigned offset = req->page_offset;
- unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
int err;
+ unsigned offset = req->page_descs[i].offset;
+ unsigned count = min(nbytes, req->page_descs[i].length);
err = fuse_copy_page(cs, &req->pages[i], offset, count,
zeroing);
@@ -862,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
return err;
nbytes -= count;
- count = min(nbytes, (unsigned) PAGE_SIZE);
- offset = 0;
}
return 0;
}
@@ -1536,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
+ int num_pages;
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, num_pages);
if (IS_ERR(req))
return PTR_ERR(req);
- offset = outarg->offset & ~PAGE_CACHE_MASK;
-
req->in.h.opcode = FUSE_NOTIFY_REPLY;
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
- req->page_offset = offset;
+ req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_CACHE_SHIFT;
- file_size = i_size_read(inode);
- num = outarg->size;
- if (outarg->offset > file_size)
- num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
- while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+ while (num && req->num_pages < num_pages) {
struct page *page;
unsigned int this_num;
@@ -1568,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..85065221a58a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
#include <linux/namei.h>
#include <linux/slab.h>
+static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_inode *fi = get_fuse_inode(dir);
+
+ if (!fc->do_readdirplus)
+ return false;
+ if (!fc->readdirplus_auto)
+ return true;
+ if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
+ return true;
+ if (filp->f_pos == 0)
+ return true;
+ return false;
+}
+
+static void fuse_advise_use_readdirplus(struct inode *dir)
+{
+ struct fuse_inode *fi = get_fuse_inode(dir);
+
+ set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
+}
+
#if BITS_PER_LONG >= 64
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
{
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
return -ECHILD;
fc = get_fuse_conn(inode);
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return 0;
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
attr_version);
fuse_change_entry_timeout(entry, &outarg);
}
+ fuse_advise_use_readdirplus(inode);
return 1;
}
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (name->len > FUSE_NAME_MAX)
goto out;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
else
fuse_invalidate_entry_cache(entry);
+ fuse_advise_use_readdirplus(dir);
return newent;
out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!forget)
goto out_err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version;
- drop_nlink(inode);
+ /*
+ * If i_nlink == 0 then unlink doesn't make sense, yet this can
+ * happen if userspace filesystem is careless. It would be
+ * difficult to enforce correct nlink usage so just ignore this
+ * condition here
+ */
+ if (inode->i_nlink > 0)
+ drop_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
int err;
struct fuse_rename_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct fuse_req *req;
u64 attr_version;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
/*
* Calling into a user-controlled filesystem gives the filesystem
- * daemon ptrace-like capabilities over the requester process. This
+ * daemon ptrace-like capabilities over the current process. This
* means, that the filesystem daemon is able to record the exact
* filesystem operations performed, and can also control the behavior
* of the requester process in otherwise impossible ways. For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
* for which the owner of the mount has ptrace privilege. This
* excludes processes started by other users, suid or sgid processes.
*/
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+int fuse_allow_current_process(struct fuse_conn *fc)
{
const struct cred *cred;
- int ret;
if (fc->flags & FUSE_ALLOW_OTHER)
return 1;
- rcu_read_lock();
- ret = 0;
- cred = __task_cred(task);
+ cred = current_cred();
if (uid_eq(cred->euid, fc->user_id) &&
uid_eq(cred->suid, fc->user_id) &&
uid_eq(cred->uid, fc->user_id) &&
gid_eq(cred->egid, fc->group_id) &&
gid_eq(cred->sgid, fc->group_id) &&
gid_eq(cred->gid, fc->group_id))
- ret = 1;
- rcu_read_unlock();
+ return 1;
- return ret;
+ return 0;
}
static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
if (fc->no_access)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
bool refreshed = false;
int err = 0;
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
/*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
return 0;
}
-static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+static int fuse_direntplus_link(struct file *file,
+ struct fuse_direntplus *direntplus,
+ u64 attr_version)
{
int err;
+ struct fuse_entry_out *o = &direntplus->entry_out;
+ struct fuse_dirent *dirent = &direntplus->dirent;
+ struct dentry *parent = file->f_path.dentry;
+ struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
+ struct dentry *dentry;
+ struct dentry *alias;
+ struct inode *dir = parent->d_inode;
+ struct fuse_conn *fc;
+ struct inode *inode;
+
+ if (!o->nodeid) {
+ /*
+ * Unlike in the case of fuse_lookup, zero nodeid does not mean
+ * ENOENT. Instead, it only means the userspace filesystem did
+ * not want to return attributes/handle for this entry.
+ *
+ * So do nothing.
+ */
+ return 0;
+ }
+
+ if (name.name[0] == '.') {
+ /*
+ * We could potentially refresh the attributes of the directory
+ * and its parent?
+ */
+ if (name.len == 1)
+ return 0;
+ if (name.name[1] == '.' && name.len == 2)
+ return 0;
+ }
+ fc = get_fuse_conn(dir);
+
+ name.hash = full_name_hash(name.name, name.len);
+ dentry = d_lookup(parent, &name);
+ if (dentry && dentry->d_inode) {
+ inode = dentry->d_inode;
+ if (get_node_id(inode) == o->nodeid) {
+ struct fuse_inode *fi;
+ fi = get_fuse_inode(inode);
+ spin_lock(&fc->lock);
+ fi->nlookup++;
+ spin_unlock(&fc->lock);
+
+ /*
+ * The other branch to 'found' comes via fuse_iget()
+ * which bumps nlookup inside
+ */
+ goto found;
+ }
+ err = d_invalidate(dentry);
+ if (err)
+ goto out;
+ dput(dentry);
+ dentry = NULL;
+ }
+
+ dentry = d_alloc(parent, &name);
+ err = -ENOMEM;
+ if (!dentry)
+ goto out;
+
+ inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
+ &o->attr, entry_attr_timeout(o), attr_version);
+ if (!inode)
+ goto out;
+
+ alias = d_materialise_unique(dentry, inode);
+ err = PTR_ERR(alias);
+ if (IS_ERR(alias))
+ goto out;
+ if (alias) {
+ dput(dentry);
+ dentry = alias;
+ }
+
+found:
+ fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
+ attr_version);
+
+ fuse_change_entry_timeout(dentry, o);
+
+ err = 0;
+out:
+ if (dentry)
+ dput(dentry);
+ return err;
+}
+
+static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
+ void *dstbuf, filldir_t filldir, u64 attr_version)
+{
+ struct fuse_direntplus *direntplus;
+ struct fuse_dirent *dirent;
+ size_t reclen;
+ int over = 0;
+ int ret;
+
+ while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
+ direntplus = (struct fuse_direntplus *) buf;
+ dirent = &direntplus->dirent;
+ reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
+
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+
+ if (!over) {
+ /* We fill entries into dstbuf only as much as
+ it can hold. But we still continue iterating
+ over remaining entries to link them. If not,
+ we need to send a FORGET for each of those
+ which we did not link.
+ */
+ over = filldir(dstbuf, dirent->name, dirent->namelen,
+ file->f_pos, dirent->ino,
+ dirent->type);
+ file->f_pos = dirent->off;
+ }
+
+ buf += reclen;
+ nbytes -= reclen;
+
+ ret = fuse_direntplus_link(file, direntplus, attr_version);
+ if (ret)
+ fuse_force_forget(file, direntplus->entry_out.nodeid);
+ }
+
+ return 0;
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+ int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file->f_path.dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
+ u64 attr_version = 0;
if (is_bad_inode(inode))
return -EIO;
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
fuse_put_request(fc, req);
return -ENOMEM;
}
+
+ plus = fuse_use_readdirplus(inode, file);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+ req->page_descs[0].length = PAGE_SIZE;
+ if (plus) {
+ attr_version = fuse_get_attr_version(fc);
+ fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
+ FUSE_READDIRPLUS);
+ } else {
+ fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
+ FUSE_READDIR);
+ }
fuse_request_send(fc, req);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
- if (!err)
- err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
- filldir);
+ if (!err) {
+ if (plus) {
+ err = parse_dirplusfile(page_address(page), nbytes,
+ file, dstbuf, filldir,
+ attr_version);
+ } else {
+ err = parse_dirfile(page_address(page), nbytes, file,
+ dstbuf, filldir);
+ }
+ }
__free_page(page);
fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
char *link;
if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
loff_t oldsize;
int err;
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
struct fuse_getxattr_out outarg;
ssize_t ret;
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
if (fc->no_listxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
if (fc->no_removexattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f3ab824fa302..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_req *req;
int err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
return NULL;
ff->fc = fc;
- ff->reserved_req = fuse_request_alloc();
+ ff->reserved_req = fuse_request_alloc(0);
if (unlikely(!ff->reserved_req)) {
kfree(ff);
return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (fc->no_flush)
return 0;
- req = fuse_get_req_nofail(fc, file);
+ req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
fuse_sync_writes(inode);
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
*/
fuse_wait_on_page_writeback(inode, page->index);
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, 1);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
+ req->page_descs[0].length = count;
num_read = fuse_send_read(req, file, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
struct fuse_req *req;
struct file *file;
struct inode *inode;
+ unsigned nr_pages;
};
static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+ int nr_alloc = min_t(unsigned, data->nr_pages,
+ FUSE_MAX_PAGES_PER_REQ);
fuse_send_readpages(req, data->file);
- data->req = req = fuse_get_req(fc);
+ data->req = req = fuse_get_req(fc, nr_alloc);
if (IS_ERR(req)) {
unlock_page(page);
return PTR_ERR(req);
}
}
+
+ if (WARN_ON(req->num_pages >= req->max_pages)) {
+ fuse_put_request(fc, req);
+ return -EIO;
+ }
+
page_cache_get(page);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = PAGE_SIZE;
req->num_pages++;
+ data->nr_pages--;
return 0;
}
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data;
int err;
+ int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
err = -EIO;
if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
data.file = file;
data.inode = inode;
- data.req = fuse_get_req(fc);
+ data.req = fuse_get_req(fc, nr_alloc);
+ data.nr_pages = nr_pages;
err = PTR_ERR(data.req);
if (IS_ERR(data.req))
goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
res = fuse_send_write(req, file, pos, count, NULL);
- offset = req->page_offset;
+ offset = req->page_descs[0].offset;
count = res;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
int err;
req->in.argpages = 1;
- req->page_offset = offset;
+ req->page_descs[0].offset = offset;
do {
size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
err = 0;
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = tmp;
req->num_pages++;
iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
- req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+ req->num_pages < req->max_pages && offset == 0);
return count > 0 ? count : err;
}
+static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
+{
+ return min_t(unsigned,
+ ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
+ (pos >> PAGE_CACHE_SHIFT) + 1,
+ FUSE_MAX_PAGES_PER_REQ);
+}
+
static ssize_t fuse_perform_write(struct file *file,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
do {
struct fuse_req *req;
ssize_t count;
+ unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
}
}
-static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+static inline void fuse_page_descs_length_init(struct fuse_req *req,
+ unsigned index, unsigned nr_pages)
+{
+ int i;
+
+ for (i = index; i < index + nr_pages; i++)
+ req->page_descs[i].length = PAGE_SIZE -
+ req->page_descs[i].offset;
+}
+
+static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
+{
+ return (unsigned long)ii->iov->iov_base + ii->iov_offset;
+}
+
+static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
+ size_t max_size)
+{
+ return min(iov_iter_single_seg_count(ii), max_size);
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t *nbytesp, int write)
{
- size_t nbytes = *nbytesp;
- unsigned long user_addr = (unsigned long) buf;
- unsigned offset = user_addr & ~PAGE_MASK;
- int npages;
+ size_t nbytes = 0; /* # bytes already packed in req */
/* Special case for kernel I/O: can copy directly into the buffer */
if (segment_eq(get_fs(), KERNEL_DS)) {
+ unsigned long user_addr = fuse_get_user_addr(ii);
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+
if (write)
req->in.args[1].value = (void *) user_addr;
else
req->out.args[0].value = (void *) user_addr;
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
return 0;
}
- nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
- npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
- npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
- if (npages < 0)
- return npages;
+ while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
+ unsigned npages;
+ unsigned long user_addr = fuse_get_user_addr(ii);
+ unsigned offset = user_addr & ~PAGE_MASK;
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
+ int ret;
+
+ unsigned n = req->max_pages - req->num_pages;
+ frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
+
+ npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ npages = clamp(npages, 1U, n);
+
+ ret = get_user_pages_fast(user_addr, npages, !write,
+ &req->pages[req->num_pages]);
+ if (ret < 0)
+ return ret;
- req->num_pages = npages;
- req->page_offset = offset;
+ npages = ret;
+ frag_size = min_t(size_t, frag_size,
+ (npages << PAGE_SHIFT) - offset);
+ iov_iter_advance(ii, frag_size);
+
+ req->page_descs[req->num_pages].offset = offset;
+ fuse_page_descs_length_init(req, req->num_pages, npages);
+
+ req->num_pages += npages;
+ req->page_descs[req->num_pages - 1].length -=
+ (npages << PAGE_SHIFT) - offset - frag_size;
+
+ nbytes += frag_size;
+ }
if (write)
req->in.argpages = 1;
else
req->out.argpages = 1;
- nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
- *nbytesp = min(*nbytesp, nbytes);
+ *nbytesp = nbytes;
return 0;
}
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write)
+static inline int fuse_iter_npages(const struct iov_iter *ii_p)
+{
+ struct iov_iter ii = *ii_p;
+ int npages = 0;
+
+ while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
+ unsigned long user_addr = fuse_get_user_addr(&ii);
+ unsigned offset = user_addr & ~PAGE_MASK;
+ size_t frag_size = iov_iter_single_seg_count(&ii);
+
+ npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ iov_iter_advance(&ii, frag_size);
+ }
+
+ return min(npages, FUSE_MAX_PAGES_PER_REQ);
+}
+
+ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, size_t count, loff_t *ppos,
+ int write)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
loff_t pos = *ppos;
ssize_t res = 0;
struct fuse_req *req;
+ struct iov_iter ii;
+
+ iov_iter_init(&ii, iov, nr_segs, count, 0);
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, fuse_iter_npages(&ii));
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
size_t nres;
fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
- int err = fuse_get_user_pages(req, buf, &nbytes, write);
+ int err = fuse_get_user_pages(req, &ii, &nbytes, write);
if (err) {
res = err;
break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
count -= nres;
res += nres;
pos += nres;
- buf += nres;
if (nres != nbytes)
break;
if (count) {
fuse_put_request(fc, req);
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, fuse_iter_npages(&ii));
if (IS_ERR(req))
break;
}
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
-static ssize_t fuse_direct_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
ssize_t res;
struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
if (is_bad_inode(inode))
return -EIO;
- res = fuse_direct_io(file, buf, count, ppos, 0);
+ res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
+ ppos, 0);
fuse_invalidate_attr(inode);
return res;
}
-static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct iovec iov = { .iov_base = buf, .iov_len = count };
+ return __fuse_direct_read(file, &iov, 1, ppos);
+}
+
+static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
struct inode *inode = file->f_path.dentry->d_inode;
+ size_t count = iov_length(iov, nr_segs);
ssize_t res;
res = generic_write_checks(file, ppos, &count, 0);
if (!res) {
- res = fuse_direct_io(file, buf, count, ppos, 1);
+ res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
if (res > 0)
fuse_write_update_size(inode, *ppos);
}
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res;
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
- res = __fuse_direct_write(file, buf, count, ppos);
+ res = __fuse_direct_write(file, &iov, 1, ppos);
mutex_unlock(&inode->i_mutex);
return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
set_page_writeback(page);
- req = fuse_request_alloc_nofs();
+ req = fuse_request_alloc_nofs(1);
if (!req)
goto err;
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
- req->page_offset = 0;
+ req->page_descs[0].offset = 0;
+ req->page_descs[0].length = PAGE_SIZE;
req->end = fuse_writepage_end;
req->inode = inode;
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
struct fuse_lk_out outarg;
int err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
if (fl->fl_flags & FL_CLOSE)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
if (!inode->i_sb->s_bdev || fc->no_bmap)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return 0;
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
num_pages++;
}
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, num_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
}
memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
req->num_pages = num_pages;
+ fuse_page_descs_length_init(req, 0, req->num_pages);
/* okay, let's send it to the client */
req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
struct inode *inode = file->f_dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
return DEFAULT_POLLMASK;
poll_wait(file, &ff->poll_wait, wait);
+ inarg.events = (__u32)poll_requested_events(wait);
/*
* Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
fuse_register_polled_file(fc, ff);
}
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return POLLERR;
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
return 0;
}
-static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos, int rw)
-{
- const struct iovec *vector = iov;
- ssize_t ret = 0;
-
- while (nr_segs > 0) {
- void __user *base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- if (rw == WRITE)
- nr = __fuse_direct_write(filp, base, len, ppos);
- else
- nr = fuse_direct_read(filp, base, len, ppos);
-
- if (nr < 0) {
- if (!ret)
- ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
-
- return ret;
-}
-
-
static ssize_t
fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
@@ -2172,7 +2239,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
file = iocb->ki_filp;
pos = offset;
- ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
+ if (rw == WRITE)
+ ret = __fuse_direct_write(file, iov, nr_segs, &pos);
+ else
+ ret = __fuse_direct_read(file, iov, nr_segs, &pos);
return ret;
}
@@ -2194,7 +2264,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fc->no_fallocate)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
doing the mount will be allowed to access the filesystem */
#define FUSE_ALLOW_OTHER (1 << 1)
+/** Number of page pointers embedded in fuse_req */
+#define FUSE_REQ_INLINE_PAGES 1
+
/** List of active connections */
extern struct list_head fuse_conn_list;
@@ -103,6 +106,15 @@ struct fuse_inode {
/** List of writepage requestst (pending or sent) */
struct list_head writepages;
+
+ /** Miscellaneous bits describing inode state */
+ unsigned long state;
+};
+
+/** FUSE inode state bits */
+enum {
+ /** Advise readdirplus */
+ FUSE_I_ADVISE_RDPLUS,
};
struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
struct fuse_arg args[3];
};
+/** FUSE page descriptor */
+struct fuse_page_desc {
+ unsigned int length;
+ unsigned int offset;
+};
+
/** The request state */
enum fuse_req_state {
FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
} misc;
/** page vector */
- struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+ struct page **pages;
+
+ /** page-descriptor vector */
+ struct fuse_page_desc *page_descs;
+
+ /** size of the 'pages' array */
+ unsigned max_pages;
+
+ /** inline page vector */
+ struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
+
+ /** inline page-descriptor vector */
+ struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
/** number of pages in vector */
unsigned num_pages;
- /** offset of data on first page */
- unsigned page_offset;
-
/** File used in the request (or NULL) */
struct fuse_file *ff;
@@ -487,6 +514,12 @@ struct fuse_conn {
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
+ /** Does the filesystem support readdirplus? */
+ unsigned do_readdirplus:1;
+
+ /** Does the filesystem want adaptive readdirplus? */
+ unsigned readdirplus_auto:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
struct fuse_forget_link *fuse_alloc_forget(void);
+/* Used by READDIRPLUS */
+void fuse_force_forget(struct file *file, u64 nodeid);
+
/**
* Initialize READ or READDIR request
*/
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
/**
* Allocate a request
*/
-struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc(unsigned npages);
-struct fuse_req *fuse_request_alloc_nofs(void);
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
/**
* Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
void fuse_request_free(struct fuse_req *req);
/**
- * Get a request, may fail with -ENOMEM
+ * Get a request, may fail with -ENOMEM,
+ * caller should specify # elements in req->pages[] explicitly
*/
-struct fuse_req *fuse_get_req(struct fuse_conn *fc);
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
+
+/**
+ * Get a request, may fail with -ENOMEM,
+ * useful for callers who doesn't use req->pages[]
+ */
+static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
+{
+ return fuse_get_req(fc, 0);
+}
/**
* Gets a requests for a file operation, always succeeds
*/
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+ struct file *file);
/**
* Decrement reference count of a request. If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
int fuse_valid_type(int m);
/**
- * Is task allowed to perform filesystem operation?
+ * Is current process allowed to perform filesystem operation?
*/
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+int fuse_allow_current_process(struct fuse_conn *fc);
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write);
+ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, size_t count, loff_t *ppos,
+ int write);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags);
long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..01353ed75750 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->attr_version = 0;
fi->writectr = 0;
fi->orig_ino = 0;
+ fi->state = 0;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
struct fuse_statfs_out outarg;
int err;
- if (!fuse_allow_task(fc, current)) {
+ if (!fuse_allow_current_process(fc)) {
buf->f_type = FUSE_SUPER_MAGIC;
return 0;
}
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->dont_mask = 1;
if (arg->flags & FUSE_AUTO_INVAL_DATA)
fc->auto_inval_data = 1;
+ if (arg->flags & FUSE_DO_READDIRPLUS)
+ fc->do_readdirplus = 1;
+ if (arg->flags & FUSE_READDIRPLUS_AUTO)
+ fc->readdirplus_auto = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
+ FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
/* only now - we want root dentry with NULL ->d_op */
sb->s_d_op = &fuse_dentry_operations;
- init_req = fuse_request_alloc();
+ init_req = fuse_request_alloc(0);
if (!init_req)
goto err_put_root;
if (is_bdev) {
- fc->destroy_req = fuse_request_alloc();
+ fc->destroy_req = fuse_request_alloc(0);
if (!fc->destroy_req)
goto err_free_init_req;
}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
return mnt;
}
+static int
+nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_getattr(mnt, dentry, stat);
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+}
+
+static int
+nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_setattr(dentry, attr);
+ return -EACCES;
+}
+
const struct inode_operations nfs_mountpoint_inode_operations = {
.getattr = nfs_getattr,
+ .setattr = nfs_setattr,
};
const struct inode_operations nfs_referral_inode_operations = {
+ .getattr = nfs_namespace_getattr,
+ .setattr = nfs_namespace_setattr,
};
static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..2e9779b58b7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
+ nfs_put_client(clp);
if (clp != old) {
clp->cl_preserve_clid = true;
- nfs_put_client(clp);
clp = old;
- atomic_inc(&clp->cl_count);
}
return clp;
@@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
.clientid = new->cl_clientid,
.confirm = new->cl_confirm,
};
- int status;
+ int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
if (prev)
nfs_put_client(prev);
+ prev = pos;
status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
- if (status == 0) {
+ switch (status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ break;
+ case 0:
nfs4_swap_callback_idents(pos, new);
- nfs_put_client(pos);
+ prev = NULL;
*result = pos;
dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
__func__, pos, atomic_read(&pos->cl_count));
- return 0;
- }
- if (status != -NFS4ERR_STALE_CLIENTID) {
- nfs_put_client(pos);
- dprintk("NFS: <-- %s status = %d, no result\n",
- __func__, status);
- return status;
+ default:
+ goto out;
}
spin_lock(&nn->nfs_client_lock);
- prev = pos;
}
+ spin_unlock(&nn->nfs_client_lock);
- /*
- * No matching nfs_client found. This should be impossible,
- * because the new nfs_client has already been added to
- * nfs_client_list by nfs_get_client().
- *
- * Don't BUG(), since the caller is holding a mutex.
- */
+ /* No match found. The server lost our clientid */
+out:
if (prev)
nfs_put_client(prev);
- spin_unlock(&nn->nfs_client_lock);
- pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
- return -NFS4ERR_STALE_CLIENTID;
+ dprintk("NFS: <-- %s status = %d\n", __func__, status);
+ return status;
}
#ifdef CONFIG_NFS_V4_1
@@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
struct nfs_client *pos, *n, *prev = NULL;
- int error;
+ int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
nfs_put_client(prev);
prev = pos;
- error = nfs_wait_client_init_complete(pos);
- if (error < 0) {
+ nfs4_schedule_lease_recovery(pos);
+ status = nfs_wait_client_init_complete(pos);
+ if (status < 0) {
nfs_put_client(pos);
spin_lock(&nn->nfs_client_lock);
continue;
}
-
+ status = pos->cl_cons_state;
spin_lock(&nn->nfs_client_lock);
+ if (status < 0)
+ continue;
}
if (pos->rpc_ops != new->rpc_ops)
@@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
if (!nfs4_match_serverowners(pos, new))
continue;
+ atomic_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
__func__, pos, atomic_read(&pos->cl_count));
@@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
return 0;
}
- /*
- * No matching nfs_client found. This should be impossible,
- * because the new nfs_client has already been added to
- * nfs_client_list by nfs_get_client().
- *
- * Don't BUG(), since the caller is holding a mutex.
- */
+ /* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
- pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
- return -NFS4ERR_STALE_CLIENTID;
+ dprintk("NFS: <-- %s status = %d\n", __func__, status);
+ return status;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..e61f68d5ef21 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
clp->cl_confirm = clid.confirm;
status = nfs40_walk_client_list(clp, result, cred);
- switch (status) {
- case -NFS4ERR_STALE_CLIENTID:
- set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- case 0:
+ if (status == 0) {
/* Sustain the lease, even if it's empty. If the clientid4
* goes stale it's of no use for trunking discovery. */
nfs4_schedule_state_renewal(*result);
- break;
}
-
out:
return status;
}
@@ -1863,6 +1858,7 @@ again:
case -ETIMEDOUT:
case -EAGAIN:
ssleep(1);
+ case -NFS4ERR_STALE_CLIENTID:
dprintk("NFS: %s after status %d, retrying\n",
__func__, status);
goto again;
@@ -2022,8 +2018,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
nfs4_begin_drain_session(clp);
cred = nfs4_get_exchange_id_cred(clp);
status = nfs4_proc_destroy_session(clp->cl_session, cred);
- if (status && status != -NFS4ERR_BADSESSION &&
- status != -NFS4ERR_DEADSESSION) {
+ switch (status) {
+ case 0:
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ break;
+ case -NFS4ERR_BACK_CHAN_BUSY:
+ case -NFS4ERR_DELAY:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ status = 0;
+ ssleep(1);
+ goto out;
+ default:
status = nfs4_recovery_handle_error(clp, status);
goto out;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..b056b1628722 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2589,27 +2589,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
- int error;
- dprintk("--> nfs_xdev_mount_common()\n");
+ dprintk("--> nfs_xdev_mount()\n");
mount_info.mntfh = mount_info.cloned->fh;
/* create a new volume representation */
server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
- if (IS_ERR(server)) {
- error = PTR_ERR(server);
- goto out_err;
- }
- mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod);
- dprintk("<-- nfs_xdev_mount_common() = 0\n");
-out:
- return mntroot;
+ if (IS_ERR(server))
+ mntroot = ERR_CAST(server);
+ else
+ mntroot = nfs_fs_mount_common(server, flags,
+ dev_name, &mount_info, nfs_mod);
-out_err:
- dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
- goto out;
+ dprintk("<-- nfs_xdev_mount() = %ld\n",
+ IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
+ return mntroot;
}
#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..f3859354e41a 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
if (ret < 0)
printk(KERN_ERR "NILFS: GC failed during preparation: "
"cannot read source blocks: err=%d\n", ret);
- else
+ else {
+ if (nilfs_sb_need_update(nilfs))
+ set_nilfs_discontinued(nilfs);
ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+ }
nilfs_remove_all_gcinodes(nilfs);
clear_nilfs_gc_running(nilfs);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
- gtime += t->gtime;
+ gtime += task_gtime(t);
t = next_thread(t);
} while (t != task);
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt = task->min_flt;
maj_flt = task->maj_flt;
task_cputime_adjusted(task, &utime, &stime);
- gtime = task->gtime;
+ gtime = task_gtime(task);
}
/* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
.readdir = proc_tgid_net_readdir,
};
-
-struct proc_dir_entry *proc_net_fops_create(struct net *net,
- const char *name, umode_t mode, const struct file_operations *fops)
-{
- return proc_create(name, mode, net->proc_net, fops);
-}
-EXPORT_SYMBOL_GPL(proc_net_fops_create);
-
-void proc_net_remove(struct net *net, const char *name)
-{
- remove_proc_entry(name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(proc_net_remove);
-
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 7003e5266f25..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
{
char *hdr;
- struct timeval timestamp;
+ struct timespec timestamp;
size_t len;
- do_gettimeofday(&timestamp);
+ /* Report zeroed timestamp if called before timekeeping has resumed. */
+ if (__getnstimeofday(&timestamp)) {
+ timestamp.tv_sec = 0;
+ timestamp.tv_nsec = 0;
+ }
hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
- (long)timestamp.tv_sec, (long)timestamp.tv_usec);
+ (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
WARN_ON_ONCE(!hdr);
len = hdr ? strlen(hdr) : 0;
persistent_ram_write(prz, hdr, len);
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
+#include <linux/sched/rt.h>
#include <asm/uaccess.h>
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
}
EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
+/**
+ * sysfs_add_link_to_group - add a symlink to an attribute group.
+ * @kobj: The kobject containing the group.
+ * @group_name: The name of the group.
+ * @target: The target kobject of the symlink to create.
+ * @link_name: The name of the symlink to create.
+ */
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+ struct kobject *target, const char *link_name)
+{
+ struct sysfs_dirent *dir_sd;
+ int error = 0;
+
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+ if (!dir_sd)
+ return -ENOENT;
+
+ error = sysfs_create_link_sd(dir_sd, target, link_name);
+ sysfs_put(dir_sd);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
+
+/**
+ * sysfs_remove_link_from_group - remove a symlink from an attribute group.
+ * @kobj: The kobject containing the group.
+ * @group_name: The name of the group.
+ * @link_name: The name of the symlink to remove.
+ */
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+ const char *link_name)
+{
+ struct sysfs_dirent *dir_sd;
+
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+ if (dir_sd) {
+ sysfs_hash_and_remove(dir_sd, NULL, link_name);
+ sysfs_put(dir_sd);
+ }
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
EXPORT_SYMBOL_GPL(sysfs_create_group);
EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
#include "sysfs.h"
-static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
- const char *name, int warn)
+static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
+ struct kobject *target,
+ const char *name, int warn)
{
- struct sysfs_dirent *parent_sd = NULL;
struct sysfs_dirent *target_sd = NULL;
struct sysfs_dirent *sd = NULL;
struct sysfs_addrm_cxt acxt;
enum kobj_ns_type ns_type;
int error;
- BUG_ON(!name);
-
- if (!kobj)
- parent_sd = &sysfs_root;
- else
- parent_sd = kobj->sd;
-
- error = -EFAULT;
- if (!parent_sd)
- goto out_put;
+ BUG_ON(!name || !parent_sd);
/* target->sd can go away beneath us but is protected with
* sysfs_assoc_lock. Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
}
/**
+ * sysfs_create_link_sd - create symlink to a given object.
+ * @sd: directory we're creating the link in.
+ * @target: object we're pointing to.
+ * @name: name of the symlink.
+ */
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+ const char *name)
+{
+ return sysfs_do_create_link_sd(sd, target, name, 1);
+}
+
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+ const char *name, int warn)
+{
+ struct sysfs_dirent *parent_sd = NULL;
+
+ if (!kobj)
+ parent_sd = &sysfs_root;
+ else
+ parent_sd = kobj->sd;
+
+ if (!parent_sd)
+ return -EFAULT;
+
+ return sysfs_do_create_link_sd(parent_sd, target, name, warn);
+}
+
+/**
* sysfs_create_link - create symlink between two objects.
* @kobj: object whose directory we're creating the link in.
* @target: object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
* symlink.c
*/
extern const struct inode_operations sysfs_symlink_inode_operations;
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+ const char *name);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
targs.mp = mp;
targs.agbp = agbp;
targs.agno = args->agno;
- targs.mod = targs.minleft = targs.wasdel = targs.userdata =
- targs.minalignslop = 0;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
}
if (ioend->io_iocb) {
+ inode_dio_done(ioend->io_inode);
if (ioend->io_isasync) {
aio_complete(ioend->io_iocb, ioend->io_error ?
ioend->io_error : ioend->io_result, 0);
}
- inode_dio_done(ioend->io_inode);
}
mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
if (rsvd)
args.trans->t_flags |= XFS_TRANS_RESERVE;
- if ((error = xfs_trans_reserve(args.trans, args.total,
- XFS_ATTRSET_LOG_RES(mp, args.total), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {
+ error = xfs_trans_reserve(args.trans, args.total,
+ XFS_ATTRSETM_LOG_RES(mp) +
+ XFS_ATTRSETRT_LOG_RES(mp) * args.total,
+ 0, XFS_TRANS_PERM_LOG_RES,
+ XFS_ATTRSET_LOG_COUNT);
+ if (error) {
xfs_trans_cancel(args.trans, 0);
return(error);
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
xfs_fsblock_t *firstblock, /* first block allocated in xaction */
xfs_extlen_t total, /* total blocks needed by transaction */
int *logflagsp, /* inode logging flags */
- int whichfork); /* data or attr fork */
+ int whichfork, /* data or attr fork */
+ void (*init_fn)(struct xfs_buf *bp,
+ struct xfs_inode *ip,
+ struct xfs_ifork *ifp));
/*
* Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
}
/*
- * Called from xfs_bmap_add_attrfork to handle local format files.
+ * Block initialisation functions for local to extent format conversion.
+ * As these get more complex, they will be moved to the relevant files,
+ * but for now they are too simple to worry about.
+ */
+STATIC void
+xfs_bmap_local_to_extents_init_fn(
+ struct xfs_buf *bp,
+ struct xfs_inode *ip,
+ struct xfs_ifork *ifp)
+{
+ bp->b_ops = &xfs_bmbt_buf_ops;
+ memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+}
+
+STATIC void
+xfs_symlink_local_to_remote(
+ struct xfs_buf *bp,
+ struct xfs_inode *ip,
+ struct xfs_ifork *ifp)
+{
+ /* remote symlink blocks are not verifiable until CRCs come along */
+ bp->b_ops = NULL;
+ memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+}
+
+/*
+ * Called from xfs_bmap_add_attrfork to handle local format files. Each
+ * different data fork content type needs a different callout to do the
+ * conversion. Some are basic and only require special block initialisation
+ * callouts for the data formating, others (directories) are so specialised they
+ * handle everything themselves.
+ *
+ * XXX (dgc): investigate whether directory conversion can use the generic
+ * formatting callout. It should be possible - it's just a very complex
+ * formatter. it would also require passing the transaction through to the init
+ * function.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
int *flags) /* inode logging flags */
{
xfs_da_args_t dargs; /* args for dir/attr code */
- int error; /* error return value */
- xfs_mount_t *mp; /* mount structure pointer */
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
+
if (S_ISDIR(ip->i_d.di_mode)) {
- mp = ip->i_mount;
memset(&dargs, 0, sizeof(dargs));
dargs.dp = ip;
dargs.firstblock = firstblock;
dargs.flist = flist;
- dargs.total = mp->m_dirblkfsbs;
+ dargs.total = ip->i_mount->m_dirblkfsbs;
dargs.whichfork = XFS_DATA_FORK;
dargs.trans = tp;
- error = xfs_dir2_sf_to_block(&dargs);
- } else
- error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
- XFS_DATA_FORK);
- return error;
+ return xfs_dir2_sf_to_block(&dargs);
+ }
+
+ if (S_ISLNK(ip->i_d.di_mode))
+ return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
+ flags, XFS_DATA_FORK,
+ xfs_symlink_local_to_remote);
+
+ return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
+ XFS_DATA_FORK,
+ xfs_bmap_local_to_extents_init_fn);
}
/*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
args.fsbno = *firstblock;
}
args.minlen = args.maxlen = args.prod = 1;
- args.total = args.minleft = args.alignment = args.mod = args.isfl =
- args.minalignslop = 0;
args.wasdel = wasdel;
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
xfs_fsblock_t *firstblock, /* first block allocated in xaction */
xfs_extlen_t total, /* total blocks needed by transaction */
int *logflagsp, /* inode logging flags */
- int whichfork) /* data or attr fork */
+ int whichfork,
+ void (*init_fn)(struct xfs_buf *bp,
+ struct xfs_inode *ip,
+ struct xfs_ifork *ifp))
{
int error; /* error return value */
int flags; /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
xfs_buf_t *bp; /* buffer for extent block */
xfs_bmbt_rec_host_t *ep;/* extent record pointer */
+ ASSERT((ifp->if_flags &
+ (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
args.firstblock = *firstblock;
- ASSERT((ifp->if_flags &
- (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
/*
* Allocate a block. We know we need only one, since the
* file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
args.type = XFS_ALLOCTYPE_NEAR_BNO;
}
args.total = total;
- args.mod = args.minleft = args.alignment = args.wasdel =
- args.isfl = args.minalignslop = 0;
args.minlen = args.maxlen = args.prod = 1;
- if ((error = xfs_alloc_vextent(&args)))
+ error = xfs_alloc_vextent(&args);
+ if (error)
goto done;
- /*
- * Can't fail, the space was reserved.
- */
+
+ /* Can't fail, the space was reserved. */
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
*firstblock = args.fsbno;
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
- bp->b_ops = &xfs_bmbt_buf_ops;
- memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+
+ /* initialise the block and copy the data */
+ init_fn(bp, ip, ifp);
+
+ /* account for the change in fork size and log everything */
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4680,9 +4724,6 @@ __xfs_bmapi_allocate(
return error;
}
- if (bma->flags & XFS_BMAPI_STACK_SWITCH)
- bma->stack_switch = 1;
-
error = xfs_bmap_alloc(bma);
if (error)
return error;
@@ -4922,8 +4963,32 @@ xfs_bmapi_write(
XFS_STATS_INC(xs_blk_mapw);
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+ /*
+ * XXX (dgc): This assumes we are only called for inodes that
+ * contain content neutral data in local format. Anything that
+ * contains caller-specific data in local format that needs
+ * transformation to move to a block format needs to do the
+ * conversion to extent format itself.
+ *
+ * Directory data forks and attribute forks handle this
+ * themselves, but with the addition of metadata verifiers every
+ * data fork in local format now contains caller specific data
+ * and as such conversion through this function is likely to be
+ * broken.
+ *
+ * The only likely user of this branch is for remote symlinks,
+ * but we cannot overwrite the data fork contents of the symlink
+ * (EEXIST occurs higher up the stack) and so it will never go
+ * from local format to extent format here. Hence I don't think
+ * this branch is ever executed intentionally and we should
+ * consider removing it and asserting that xfs_bmapi_write()
+ * cannot be called directly on local format forks. i.e. callers
+ * are completely responsible for local to extent format
+ * conversion, not xfs_bmapi_write().
+ */
error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
- &bma.logflags, whichfork);
+ &bma.logflags, whichfork,
+ xfs_bmap_local_to_extents_init_fn);
if (error)
goto error0;
}
@@ -4956,6 +5021,9 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
+ if (flags & XFS_BMAPI_STACK_SWITCH)
+ bma.stack_switch = 1;
+
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 56d1614760cf..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -487,6 +487,7 @@ _xfs_buf_find(
struct rb_node *parent;
xfs_buf_t *bp;
xfs_daddr_t blkno = map[0].bm_bn;
+ xfs_daddr_t eofs;
int numblks = 0;
int i;
@@ -498,6 +499,23 @@ _xfs_buf_find(
ASSERT(!(numbytes < (1 << btp->bt_sshift)));
ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
+ /*
+ * Corrupted block numbers can get through to here, unfortunately, so we
+ * have to check that the buffer falls within the filesystem bounds.
+ */
+ eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
+ if (blkno >= eofs) {
+ /*
+ * XXX (dgc): we should really be returning EFSCORRUPTED here,
+ * but none of the higher level infrastructure supports
+ * returning a specific error on buffer lookup failures.
+ */
+ xfs_alert(btp->bt_mount,
+ "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+ __func__, blkno, eofs);
+ return NULL;
+ }
+
/* get tree root */
pag = xfs_perag_get(btp->bt_mount,
xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -933,8 +951,6 @@ xfs_buf_trylock(
locked = down_trylock(&bp->b_sema) == 0;
if (locked)
XB_SET_OWNER(bp);
- else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_target->bt_mount, 0);
trace_xfs_buf_trylock(bp, _RET_IP_);
return locked;
@@ -1487,6 +1503,8 @@ restart:
while (!list_empty(&btp->bt_lru)) {
bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
if (atomic_read(&bp->b_hold) > 1) {
+ trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
spin_unlock(&btp->bt_lru_lock);
delay(100);
goto restart;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 77b09750e92c..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
-
-#ifdef XFS_TRANS_DEBUG
-/*
- * This function uses an alternate strategy for tracking the bytes
- * that the user requests to be logged. This can then be used
- * in conjunction with the bli_orig array in the buf log item to
- * catch bugs in our callers' code.
- *
- * We also double check the bits set in xfs_buf_item_log using a
- * simple algorithm to check that every byte is accounted for.
- */
-STATIC void
-xfs_buf_item_log_debug(
- xfs_buf_log_item_t *bip,
- uint first,
- uint last)
-{
- uint x;
- uint byte;
- uint nbytes;
- uint chunk_num;
- uint word_num;
- uint bit_num;
- uint bit_set;
- uint *wordp;
-
- ASSERT(bip->bli_logged != NULL);
- byte = first;
- nbytes = last - first + 1;
- bfset(bip->bli_logged, first, nbytes);
- for (x = 0; x < nbytes; x++) {
- chunk_num = byte >> XFS_BLF_SHIFT;
- word_num = chunk_num >> BIT_TO_WORD_SHIFT;
- bit_num = chunk_num & (NBWORD - 1);
- wordp = &(bip->__bli_format.blf_data_map[word_num]);
- bit_set = *wordp & (1 << bit_num);
- ASSERT(bit_set);
- byte++;
- }
-}
-
-/*
- * This function is called when we flush something into a buffer without
- * logging it. This happens for things like inodes which are logged
- * separately from the buffer.
- */
-void
-xfs_buf_item_flush_log_debug(
- xfs_buf_t *bp,
- uint first,
- uint last)
-{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
- uint nbytes;
-
- if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
- return;
-
- ASSERT(bip->bli_logged != NULL);
- nbytes = last - first + 1;
- bfset(bip->bli_logged, first, nbytes);
-}
-
-/*
- * This function is called to verify that our callers have logged
- * all the bytes that they changed.
- *
- * It does this by comparing the original copy of the buffer stored in
- * the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which mismatch are set in the bli_logged
- * array of the buf log item.
- */
-STATIC void
-xfs_buf_item_log_check(
- xfs_buf_log_item_t *bip)
-{
- char *orig;
- char *buffer;
- int x;
- xfs_buf_t *bp;
-
- ASSERT(bip->bli_orig != NULL);
- ASSERT(bip->bli_logged != NULL);
-
- bp = bip->bli_buf;
- ASSERT(bp->b_length > 0);
- ASSERT(bp->b_addr != NULL);
- orig = bip->bli_orig;
- buffer = bp->b_addr;
- for (x = 0; x < BBTOB(bp->b_length); x++) {
- if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
- xfs_emerg(bp->b_mount,
- "%s: bip %x buffer %x orig %x index %d",
- __func__, bip, bp, orig, x);
- ASSERT(0);
- }
- }
-}
-#else
-#define xfs_buf_item_log_debug(x,y,z)
-#define xfs_buf_item_log_check(x)
-#endif
-
STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
/*
@@ -429,7 +326,6 @@ xfs_buf_item_format(
* Check to make sure everything is consistent.
*/
trace_xfs_buf_item_format(bip);
- xfs_buf_item_log_check(bip);
}
/*
@@ -573,8 +469,18 @@ xfs_buf_item_push(
if (xfs_buf_ispinned(bp))
return XFS_ITEM_PINNED;
- if (!xfs_buf_trylock(bp))
+ if (!xfs_buf_trylock(bp)) {
+ /*
+ * If we have just raced with a buffer being pinned and it has
+ * been marked stale, we could end up stalling until someone else
+ * issues a log force to unpin the stale buffer. Check for the
+ * race condition here so xfsaild recognizes the buffer is pinned
+ * and queues a log force to move it along.
+ */
+ if (xfs_buf_ispinned(bp))
+ return XFS_ITEM_PINNED;
return XFS_ITEM_LOCKED;
+ }
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -652,7 +558,10 @@ xfs_buf_item_unlock(
/*
* If the buf item isn't tracking any data, free it, otherwise drop the
- * reference we hold to it.
+ * reference we hold to it. If we are aborting the transaction, this may
+ * be the only reference to the buf item, so we free it anyway
+ * regardless of whether it is dirty or not. A dirty abort implies a
+ * shutdown, anyway.
*/
clean = 1;
for (i = 0; i < bip->bli_format_count; i++) {
@@ -664,7 +573,12 @@ xfs_buf_item_unlock(
}
if (clean)
xfs_buf_item_relse(bp);
- else
+ else if (aborted) {
+ if (atomic_dec_and_test(&bip->bli_refcount)) {
+ ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+ xfs_buf_item_relse(bp);
+ }
+ } else
atomic_dec(&bip->bli_refcount);
if (!hold)
@@ -915,8 +829,6 @@ xfs_buf_item_log_segment(
mask = (1 << end_bit) - 1;
*wordp |= mask;
}
-
- xfs_buf_item_log_debug(bip, first, last);
}
/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 16def435944a..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,10 +98,6 @@ typedef struct xfs_buf_log_item {
unsigned int bli_flags; /* misc flags */
unsigned int bli_recur; /* lock recursion count */
atomic_t bli_refcount; /* cnt of tp refs */
-#ifdef XFS_TRANS_DEBUG
- char *bli_orig; /* original buffer copy */
- char *bli_logged; /* bytes logged (bitmap) */
-#endif
int bli_format_count; /* count of headers */
struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
struct xfs_buf_log_format __bli_format; /* embedded in-log header */
@@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
-#ifdef XFS_TRANS_DEBUG
-void
-xfs_buf_item_flush_log_debug(
- struct xfs_buf *bp,
- uint first,
- uint last);
-#else
-#define xfs_buf_item_flush_log_debug(bp, first, last)
-#endif
-
#endif /* __KERNEL__ */
#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..a8bd26b82ecb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -246,10 +246,10 @@ xfs_swap_extents(
goto out_unlock;
}
- error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
if (error)
goto out_unlock;
- truncate_pagecache_range(VFS_I(ip), 0, -1);
+ truncate_pagecache_range(VFS_I(tip), 0, -1);
/* Verify O_DIRECT for ftmp */
if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
- XFS_WRITE_LOG_RES(mp) +
- /*
- * Round the chunklen up to the next multiple
- * of 128 (buf log item chunk size)).
- */
- BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_WRITE_LOG_COUNT);
+ XFS_QM_DQALLOC_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_WRITE_LOG_COUNT);
if (error)
goto error1;
cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
int error;
tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
- error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT);
+ error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
(args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
args.type = XFS_ALLOCTYPE_THIS_BNO;
- args.mod = args.total = args.wasdel = args.isfl =
- args.userdata = args.minalignslop = 0;
args.prod = 1;
/*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
* Allocate a fixed-size extent of inodes.
*/
args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.total = args.wasdel = args.isfl =
- args.userdata = args.minalignslop = 0;
args.prod = 1;
/*
* Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
char *cp;
xfs_ifork_t *ifp;
xfs_mount_t *mp;
-#ifdef XFS_TRANS_DEBUG
- int first;
-#endif
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
static const short dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
xfs_inode_log_item_t *iip;
xfs_dinode_t *dip;
xfs_mount_t *mp;
-#ifdef XFS_TRANS_DEBUG
- int first;
-#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_IFLOCK);
+ smp_mb();
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
} else {
ASSERT(!(iip->ili_fields &
XFS_ILOG_DBROOT));
-#ifdef XFS_TRANS_DEBUG
- if (iip->ili_root_size > 0) {
- ASSERT(iip->ili_root_size ==
- ip->i_df.if_broot_bytes);
- ASSERT(memcmp(iip->ili_orig_root,
- ip->i_df.if_broot,
- iip->ili_root_size) == 0);
- } else {
- ASSERT(ip->i_df.if_broot_bytes == 0);
- }
-#endif
iip->ili_fields &= ~XFS_ILOG_DBROOT;
}
break;
@@ -678,11 +667,6 @@ void
xfs_inode_item_destroy(
xfs_inode_t *ip)
{
-#ifdef XFS_TRANS_DEBUG
- if (ip->i_itemp->ili_root_size != 0) {
- kmem_free(ip->i_itemp->ili_orig_root);
- }
-#endif
kmem_zone_free(xfs_ili_zone, ip->i_itemp);
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
data exts */
struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
attr exts */
-#ifdef XFS_TRANS_DEBUG
- int ili_root_size;
- char *ili_orig_root;
-#endif
xfs_inode_log_format_t ili_format; /* logged structure */
} xfs_inode_log_item_t;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
}
/*
+ * Determine the initial size of the preallocation. We are beyond the current
+ * EOF here, but we need to take into account whether this is a sparse write or
+ * an extending write when determining the preallocation size. Hence we need to
+ * look up the extent that ends at the current write offset and use the result
+ * to determine the preallocation size.
+ *
+ * If the extent is a hole, then preallocation is essentially disabled.
+ * Otherwise we take the size of the preceeding data extent as the basis for the
+ * preallocation size. If the size of the extent is greater than half the
+ * maximum extent length, then use the current offset as the basis. This ensures
+ * that for large files the preallocation size always extends to MAXEXTLEN
+ * rather than falling short due to things like stripe unit/width alignment of
+ * real extents.
+ */
+STATIC int
+xfs_iomap_eof_prealloc_initial_size(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_bmbt_irec_t *imap,
+ int nimaps)
+{
+ xfs_fileoff_t start_fsb;
+ int imaps = 1;
+ int error;
+
+ ASSERT(nimaps >= imaps);
+
+ /* if we are using a specific prealloc size, return now */
+ if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+ return 0;
+
+ /*
+ * As we write multiple pages, the offset will always align to the
+ * start of a page and hence point to a hole at EOF. i.e. if the size is
+ * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
+ * will return FSB 1. Hence if there are blocks in the file, we want to
+ * point to the block prior to the EOF block and not the hole that maps
+ * directly at @offset.
+ */
+ start_fsb = XFS_B_TO_FSB(mp, offset);
+ if (start_fsb)
+ start_fsb--;
+ error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
+ if (error)
+ return 0;
+
+ ASSERT(imaps == 1);
+ if (imap[0].br_startblock == HOLESTARTBLOCK)
+ return 0;
+ if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
+ return imap[0].br_blockcount;
+ return XFS_B_TO_FSB(mp, offset);
+}
+
+/*
* If we don't have a user specified preallocation size, dynamically increase
* the preallocation size as the size of the file grows. Cap the maximum size
* at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
STATIC xfs_fsblock_t
xfs_iomap_prealloc_size(
struct xfs_mount *mp,
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ struct xfs_bmbt_irec *imap,
+ int nimaps)
{
xfs_fsblock_t alloc_blocks = 0;
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+ alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
+ imap, nimaps);
+ if (alloc_blocks > 0) {
int shift = 0;
int64_t freesp;
- /*
- * rounddown_pow_of_two() returns an undefined result
- * if we pass in alloc_blocks = 0. Hence the "+ 1" to
- * ensure we always pass in a non-zero value.
- */
- alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks));
@@ -351,6 +406,15 @@ xfs_iomap_prealloc_size(
}
if (shift)
alloc_blocks >>= shift;
+
+ /*
+ * If we are still trying to allocate more space than is
+ * available, squash the prealloc hard. This can happen if we
+ * have a large file on a small filesystem and the above
+ * lowspace thresholds are smaller than MAXEXTLEN.
+ */
+ while (alloc_blocks >= freesp)
+ alloc_blocks >>= 4;
}
if (alloc_blocks < mp->m_writeio_blocks)
@@ -390,7 +454,6 @@ xfs_iomap_write_delay(
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
imap, XFS_WRITE_IMAPS, &prealloc);
if (error)
@@ -398,7 +461,10 @@ xfs_iomap_write_delay(
retry:
if (prealloc) {
- xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+ xfs_fsblock_t alloc_blocks;
+
+ alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
+ XFS_WRITE_IMAPS);
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
int count,
- boolean_t syncing);
+ bool syncing);
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
- xlog_verify_iclog(log, iclog, count, B_TRUE);
+ xlog_verify_iclog(log, iclog, count, true);
/* account for log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
int count,
- boolean_t syncing)
+ bool syncing)
{
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
/* clientid is only 1 byte */
field_offset = (__psint_t)
((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
- if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+ if (!syncing || (field_offset & 0x1ff)) {
clientid = ophead->oh_clientid;
} else {
idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
/* check length */
field_offset = (__psint_t)
((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
- if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+ if (!syncing || (field_offset & 0x1ff)) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
return;
}
/* quietly fail */
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, EWRONGFS);
}
static void
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
return 0;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT);
+ error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
return 0;
tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
- error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT);
+ error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
XFS_SB_VERSIONNUM));
tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
- error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT);
+ error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
uint tr_addafork; /* cvt inode to attributed trans */
uint tr_writeid; /* write setuid/setgid file */
uint tr_attrinval; /* attr fork buffer invalidation */
- uint tr_attrset; /* set/create an attribute */
+ uint tr_attrsetm; /* set/create an attribute at mount time */
+ uint tr_attrsetrt; /* set/create an attribute at runtime */
uint tr_attrrm; /* remove an attribute */
uint tr_clearagi; /* clear bad agi unlinked ino bucket */
uint tr_growrtalloc; /* grow realtime allocations */
uint tr_growrtzero; /* grow realtime zeroing */
uint tr_growrtfree; /* grow realtime freeing */
+ uint tr_qm_sbchange; /* change quota flags */
+ uint tr_qm_setqlim; /* adjust quota limits */
+ uint tr_qm_dqalloc; /* allocate quota on disk */
+ uint tr_qm_quotaoff; /* turn quota off */
+ uint tr_qm_equotaoff;/* end of turn quota off */
+ uint tr_sb; /* modify superblock */
} xfs_trans_reservations_t;
#ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0,
- mp->m_sb.sb_sectsize + 128, 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
+ error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
+ if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
* inode goes inactive and wants to free blocks,
* or via xfs_log_mount_finish.
*/
- *needquotamount = B_TRUE;
+ *needquotamount = true;
*quotaflags = mp->m_qflags;
mp->m_qflags = 0;
}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 8a59f8546552..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
{
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_inode *uip, *gip;
- boolean_t tempuqip, tempgqip;
+ bool tempuqip, tempgqip;
uip = gip = NULL;
- tempuqip = tempgqip = B_FALSE;
+ tempuqip = tempgqip = false;
memset(out, 0, sizeof(fs_quota_stat_t));
out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
0, 0, &uip) == 0)
- tempuqip = B_TRUE;
+ tempuqip = true;
}
if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
0, 0, &gip) == 0)
- tempgqip = B_TRUE;
+ tempgqip = true;
}
if (uip) {
out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
return 0;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
+ error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
+ if (error) {
xfs_trans_cancel(tp, 0);
return (error);
}
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
+ error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
+ if (error) {
xfs_trans_cancel(tp, 0);
return (error);
}
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
uint oldsbqflag=0;
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
- if ((error = xfs_trans_reserve(tp, 0,
- sizeof(xfs_qoff_logitem_t) * 2 +
- mp->m_sb.sb_sectsize + 128,
- 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
+ error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
+ if (error)
goto error0;
- }
qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
xfs_trans_log_quotaoff_item(tp, qoffi);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
+suffix_kstrtoint(char *s, unsigned int base, int *res)
{
- int last, shift_left_factor = 0;
+ int last, shift_left_factor = 0, _res;
char *value = s;
last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
value[last] = '\0';
}
- return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+ if (kstrtoint(s, base, &_res))
+ return -EINVAL;
+ *res = _res << shift_left_factor;
+ return 0;
}
/*
@@ -174,7 +177,7 @@ xfs_parseargs(
char *options)
{
struct super_block *sb = mp->m_super;
- char *this_char, *value, *eov;
+ char *this_char, *value;
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- mp->m_logbufs = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &mp->m_logbufs))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
return EINVAL;
}
- mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+ if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- iosize = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &iosize))
+ return EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- iosize = suffix_strtoul(value, &eov, 10);
+ if (suffix_kstrtoint(value, 10, &iosize))
+ return EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- dsunit = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &dsunit))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
return EINVAL;
}
- dswidth = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &dswidth))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
#include "xfs_extent_busy.h"
#include "xfs_bmap.h"
#include "xfs_quota.h"
+#include "xfs_qm.h"
#include "xfs_trans_priv.h"
#include "xfs_trans_space.h"
#include "xfs_inode_item.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
#include "xfs_trace.h"
kmem_zone_t *xfs_trans_zone;
kmem_zone_t *xfs_log_item_desc_zone;
+/*
+ * A buffer has a format structure overhead in the log in addition
+ * to the data, so we need to take this into account when reserving
+ * space in a transaction for a buffer. Round the space required up
+ * to a multiple of 128 bytes so that we don't change the historical
+ * reservation that has been used for this overhead.
+ */
+STATIC uint
+xfs_buf_log_overhead(void)
+{
+ return round_up(sizeof(struct xlog_op_header) +
+ sizeof(struct xfs_buf_log_format), 128);
+}
+
+/*
+ * Calculate out transaction log reservation per item in bytes.
+ *
+ * The nbufs argument is used to indicate the number of items that
+ * will be changed in a transaction. size is used to tell how many
+ * bytes should be reserved per item.
+ */
+STATIC uint
+xfs_calc_buf_res(
+ uint nbufs,
+ uint size)
+{
+ return nbufs * (size + xfs_buf_log_overhead());
+}
/*
* Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
- 2 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 2) +
- 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
- XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
- (2 * mp->m_sb.sb_sectsize +
- 2 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 2) +
- 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
+ MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
- 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
- (4 * mp->m_sb.sb_sectsize +
- 4 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 4) +
- 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
- 128 * 5 +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
+ MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(5, 0) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+ mp->m_in_maxlevels, 0)));
}
/*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((4 * mp->m_sb.sb_inodesize +
- 2 * XFS_DIROP_LOG_RES(mp) +
- 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
- (3 * mp->m_sb.sb_sectsize +
- 3 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 3) +
- 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
+ MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- mp->m_sb.sb_inodesize +
- XFS_DIROP_LOG_RES(mp) +
- 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
- (mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
+ MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- mp->m_sb.sb_inodesize +
- XFS_DIROP_LOG_RES(mp) +
- 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
- (2 * mp->m_sb.sb_sectsize +
- 2 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 2) +
- 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
+ MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- mp->m_sb.sb_inodesize +
- XFS_FSB_TO_B(mp, 1) +
- XFS_DIROP_LOG_RES(mp) +
- 1024 +
- 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
- (2 * mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
- XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
+ MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(1, 1024)),
+ (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(mp->m_in_maxlevels,
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- mp->m_sb.sb_inodesize +
+ MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ (uint)XFS_FSB_TO_B(mp, 1) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, 1) +
- XFS_DIROP_LOG_RES(mp) +
- 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
- (3 * mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
- XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
+ xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(mp->m_in_maxlevels,
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- mp->m_sb.sb_inodesize +
- mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, 1) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
XFS_INODE_CLUSTER_SIZE(mp)) +
- 128 * 5 +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+ xfs_calc_buf_res(1, 0) +
+ xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+ mp->m_in_maxlevels, 0) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -343,9 +363,9 @@ STATIC uint
xfs_calc_growdata_reservation(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_sectsize * 3 +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+ return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -362,12 +382,12 @@ STATIC uint
xfs_calc_growrtalloc_reservation(
struct xfs_mount *mp)
{
- return 2 * mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
- mp->m_sb.sb_inodesize +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+ return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -379,7 +399,7 @@ STATIC uint
xfs_calc_growrtzero_reservation(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_blocksize + 128;
+ return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
}
/*
@@ -396,11 +416,10 @@ STATIC uint
xfs_calc_growrtfree_reservation(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_sectsize +
- 2 * mp->m_sb.sb_inodesize +
- mp->m_sb.sb_blocksize +
- mp->m_rsumsize +
- 128 * 5;
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
+ xfs_calc_buf_res(1, mp->m_rsumsize);
}
/*
@@ -411,7 +430,7 @@ STATIC uint
xfs_calc_swrite_reservation(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_inodesize + 128;
+ return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
}
/*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
STATIC uint
xfs_calc_writeid_reservation(xfs_mount_t *mp)
{
- return mp->m_sb.sb_inodesize + 128;
+ return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
}
/*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- mp->m_sb.sb_inodesize +
- mp->m_sb.sb_sectsize * 2 +
- mp->m_dirblksize +
- XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
- XFS_ALLOCFREE_LOG_RES(mp, 1) +
- 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
- XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+ xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(1, mp->m_dirblksize) +
+ xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -461,35 +480,51 @@ STATIC uint
xfs_calc_attrinval_reservation(
struct xfs_mount *mp)
{
- return MAX((mp->m_sb.sb_inodesize +
- XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
- 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
- (4 * mp->m_sb.sb_sectsize +
- 4 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 4) +
- 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
+ return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+ XFS_FSB_TO_B(mp, 1))),
+ (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
- * Setting an attribute.
+ * Setting an attribute at mount time.
* the inode getting the attribute
* the superblock for allocations
* the agfs extents are allocated from
* the attribute btree * max depth
* the inode allocation btree
* Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
+ * the calculation is done partially at mount time and partially at runtime(see
+ * below).
*/
STATIC uint
-xfs_calc_attrset_reservation(
+xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- mp->m_sb.sb_inodesize +
- mp->m_sb.sb_sectsize +
- XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
- 128 * (2 + XFS_DA_NODE_MAXDEPTH);
+ xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Setting an attribute at runtime, transaction space unit per block.
+ * the superblock for allocations: sector size
+ * the inode bmap btree could join or split: max depth * block size
+ * Since the runtime attribute transaction space is dependent on the total
+ * blocks needed for the 1st bmap, here we calculate out the space unit for
+ * one block so that the caller could figure out the total space according
+ * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
+ */
+STATIC uint
+xfs_calc_attrsetrt_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((mp->m_sb.sb_inodesize +
- XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
- XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
- 128 * (1 + XFS_DA_NODE_MAXDEPTH +
- XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
- (2 * mp->m_sb.sb_sectsize +
- 2 * mp->m_sb.sb_sectsize +
- mp->m_sb.sb_sectsize +
- XFS_ALLOCFREE_LOG_RES(mp, 2) +
- 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
+ MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
+ xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
+ XFS_FSB_TO_B(mp, 1)) +
+ (uint)XFS_FSB_TO_B(mp,
+ XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
+ (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -527,7 +561,78 @@ STATIC uint
xfs_calc_clear_agi_bucket_reservation(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_sectsize + 128;
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Clearing the quotaflags in the superblock.
+ * the super block for changing quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_sbchange_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Adjusting quota limits.
+ * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
+ */
+STATIC uint
+xfs_calc_qm_setqlim_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
+}
+
+/*
+ * Allocating quota on disk if needed.
+ * the write transaction log space: XFS_WRITE_LOG_RES(mp)
+ * the unit of quota allocation: one system block size
+ */
+STATIC uint
+xfs_calc_qm_dqalloc_reservation(
+ struct xfs_mount *mp)
+{
+ return XFS_WRITE_LOG_RES(mp) +
+ xfs_calc_buf_res(1,
+ XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
+}
+
+/*
+ * Turning off quotas.
+ * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ * the superblock for the quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_reservation(
+ struct xfs_mount *mp)
+{
+ return sizeof(struct xfs_qoff_logitem) * 2 +
+ xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * End of turning off quotas.
+ * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_end_reservation(
+ struct xfs_mount *mp)
+{
+ return sizeof(struct xfs_qoff_logitem) * 2;
+}
+
+/*
+ * Syncing the incore super block changes to disk.
+ * the super block to reflect the changes: sector size
+ */
+STATIC uint
+xfs_calc_sb_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/*
@@ -555,12 +660,19 @@ xfs_trans_init(
resp->tr_writeid = xfs_calc_writeid_reservation(mp);
resp->tr_addafork = xfs_calc_addafork_reservation(mp);
resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
- resp->tr_attrset = xfs_calc_attrset_reservation(mp);
+ resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
+ resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
+ resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
+ resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
+ resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
+ resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
+ resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
+ resp->tr_sb = xfs_calc_sb_reservation(mp);
}
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
* as long as SWRITE logs the entire inode core
*/
#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
+#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
-#define XFS_ATTRSET_LOG_RES(mp, ext) \
- ((mp)->m_reservations.tr_attrset + \
- (ext * (mp)->m_sb.sb_sectsize) + \
- (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
- (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
+#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm)
+#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt)
+#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
-
+#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange)
+#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim)
+#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc)
+#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff)
+#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
+#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb)
/*
* Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
-#ifdef XFS_TRANS_DEBUG
- /*
- * Walk the list checking lsn ordering, and that every entry has the
- * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
- * when specifically debugging the transaction subsystem.
- */
- prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
- list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
- if (&prev_lip->li_ail != &ailp->xa_ail)
- ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
- ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
- prev_lip = lip;
- }
-#endif /* XFS_TRANS_DEBUG */
}
#else /* !DEBUG */
#define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
int i, j;
xfs_dquot_t *dqp;
xfs_dqtrx_t *qtrx, *qa;
- boolean_t locked;
+ bool locked;
if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
* about the number of blocks used field, or deltas.
* Also we don't bother to zero the fields.
*/
- locked = B_FALSE;
+ locked = false;
if (qtrx->qt_blk_res) {
xfs_dqlock(dqp);
- locked = B_TRUE;
+ locked = true;
dqp->q_res_bcount -=
(xfs_qcnt_t)qtrx->qt_blk_res;
}
if (qtrx->qt_ino_res) {
if (!locked) {
xfs_dqlock(dqp);
- locked = B_TRUE;
+ locked = true;
}
dqp->q_res_icount -=
(xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
if (qtrx->qt_rtblk_res) {
if (!locked) {
xfs_dqlock(dqp);
- locked = B_TRUE;
+ locked = true;
}
dqp->q_res_rtbcount -=
(xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
#include "xfs_inode_item.h"
#include "xfs_trace.h"
-#ifdef XFS_TRANS_DEBUG
-STATIC void
-xfs_trans_inode_broot_debug(
- xfs_inode_t *ip);
-#else
-#define xfs_trans_inode_broot_debug(ip)
-#endif
-
/*
* Add a locked inode to the transaction.
*
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
* Get a log_item_desc to point at the new item.
*/
xfs_trans_add_item(tp, &iip->ili_item);
-
- xfs_trans_inode_broot_debug(ip);
}
/*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
flags |= ip->i_itemp->ili_last_fields;
ip->i_itemp->ili_fields |= flags;
}
-
-#ifdef XFS_TRANS_DEBUG
-/*
- * Keep track of the state of the inode btree root to make sure we
- * log it properly.
- */
-STATIC void
-xfs_trans_inode_broot_debug(
- xfs_inode_t *ip)
-{
- xfs_inode_log_item_t *iip;
-
- ASSERT(ip->i_itemp != NULL);
- iip = ip->i_itemp;
- if (iip->ili_root_size != 0) {
- ASSERT(iip->ili_orig_root != NULL);
- kmem_free(iip->ili_orig_root);
- iip->ili_root_size = 0;
- iip->ili_orig_root = NULL;
- }
- if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
- ASSERT((ip->i_df.if_broot != NULL) &&
- (ip->i_df.if_broot_bytes > 0));
- iip->ili_root_size = ip->i_df.if_broot_bytes;
- iip->ili_orig_root =
- (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
- memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
- iip->ili_root_size);
- }
-}
-#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int __uint32_t;
typedef signed long long int __int64_t;
typedef unsigned long long int __uint64_t;
-typedef enum { B_FALSE,B_TRUE } boolean_t;
typedef __uint32_t prid_t; /* project ID */
typedef __uint32_t inst_t; /* an instruction */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- boolean_t unlock_dp_on_error = B_FALSE;
+ bool unlock_dp_on_error = false;
uint cancel_flags;
int committed;
prid_t prid;
@@ -794,7 +794,7 @@ xfs_create(
}
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
- unlock_dp_on_error = B_TRUE;
+ unlock_dp_on_error = true;
xfs_bmap_init(&free_list, &first_block);
@@ -830,7 +830,7 @@ xfs_create(
* error path.
*/
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
- unlock_dp_on_error = B_FALSE;
+ unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
&first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
int pathlen;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- boolean_t unlock_dp_on_error = B_FALSE;
+ bool unlock_dp_on_error = false;
uint cancel_flags;
int committed;
xfs_fileoff_t first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
}
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
- unlock_dp_on_error = B_TRUE;
+ unlock_dp_on_error = true;
/*
* Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
* error path.
*/
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
- unlock_dp_on_error = B_FALSE;
+ unlock_dp_on_error = false;
/*
* Also attach the dquot(s) to it, if applicable.