summaryrefslogtreecommitdiffstats
path: root/fs/nfsd/vfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd/vfs.c')
-rw-r--r--fs/nfsd/vfs.c164
1 files changed, 92 insertions, 72 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b660435978d2..23341c1063bc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -55,6 +55,7 @@
#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
#include <linux/jhash.h>
+#include <linux/ima.h>
#include <asm/uaccess.h>
@@ -100,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
{
struct svc_export *exp = *expp, *exp2 = NULL;
struct dentry *dentry = *dpp;
- struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- struct dentry *mounts = dget(dentry);
+ struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ .dentry = dget(dentry)};
int err = 0;
- while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+ while (d_mountpoint(path.dentry) && follow_down(&path))
+ ;
- exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+ exp2 = rqst_exp_get_by_name(rqstp, &path);
if (IS_ERR(exp2)) {
if (PTR_ERR(exp2) != -ENOENT)
err = PTR_ERR(exp2);
- dput(mounts);
- mntput(mnt);
+ path_put(&path);
goto out;
}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
/*
- * This is subtle: dentry is *not* under mnt at this point.
- * The only reason we are safe is that original mnt is pinned
- * down by exp, so we should dput before putting exp.
+ * This is subtle: path.dentry is *not* on path.mnt
+ * at this point. The only reason we are safe is that
+ * original mnt is pinned down by exp, so we should
+ * put path *before* putting exp
*/
- dput(dentry);
- *dpp = mounts;
- exp_put(exp);
+ *dpp = path.dentry;
+ path.dentry = dentry;
*expp = exp2;
- } else {
- exp_put(exp2);
- dput(mounts);
+ exp2 = exp;
}
- mntput(mnt);
+ path_put(&path);
+ exp_put(exp2);
out:
return err;
}
@@ -168,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
/* checking mountpoint crossing is very different when stepping up */
struct svc_export *exp2 = NULL;
struct dentry *dp;
- struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- dentry = dget(dparent);
- while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+ struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ .dentry = dget(dparent)};
+
+ while (path.dentry == path.mnt->mnt_root &&
+ follow_up(&path))
;
- dp = dget_parent(dentry);
- dput(dentry);
- dentry = dp;
+ dp = dget_parent(path.dentry);
+ dput(path.dentry);
+ path.dentry = dp;
- exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+ exp2 = rqst_exp_parent(rqstp, &path);
if (PTR_ERR(exp2) == -ENOENT) {
- dput(dentry);
dentry = dget(dparent);
} else if (IS_ERR(exp2)) {
host_err = PTR_ERR(exp2);
- dput(dentry);
- mntput(mnt);
+ path_put(&path);
goto out_nfserr;
} else {
+ dentry = dget(path.dentry);
exp_put(exp);
exp = exp2;
}
- mntput(mnt);
+ path_put(&path);
}
} else {
fh_lock(fhp);
@@ -677,7 +678,6 @@ __be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
int access, struct file **filp)
{
- const struct cred *cred = current_cred();
struct dentry *dentry;
struct inode *inode;
int flags = O_RDONLY|O_LARGEFILE;
@@ -732,9 +732,11 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
vfs_dq_init(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- flags, cred);
+ flags, current_cred());
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
+ else
+ ima_counts_get(*filp);
out_nfserr:
err = nfserrno(host_err);
out:
@@ -963,6 +965,43 @@ static void kill_suid(struct dentry *dentry)
mutex_unlock(&dentry->d_inode->i_mutex);
}
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ static ino_t last_ino;
+ static dev_t last_dev;
+ int err = 0;
+
+ if (atomic_read(&inode->i_writecount) > 1
+ || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+ dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+ msleep(10);
+ dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+ }
+
+ if (inode->i_state & I_DIRTY) {
+ dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+ err = nfsd_sync(file);
+ }
+ last_ino = inode->i_ino;
+ last_dev = inode->i_sb->s_dev;
+ return err;
+}
+
static __be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
@@ -975,6 +1014,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
__be32 err = 0;
int host_err;
int stable = *stablep;
+ int use_wgather;
#ifdef MSNFS
err = nfserr_perm;
@@ -993,9 +1033,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
* - the sync export option has been set, or
* - the client requested O_SYNC behavior (NFSv3 feature).
* - The file system doesn't support fsync().
- * When gathered writes have been configured for this volume,
+ * When NFSv2 gathered writes have been configured for this volume,
* flushing the data to disk is handled separately below.
*/
+ use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!file->f_op->fsync) {/* COMMIT3 cannot work */
stable = 2;
@@ -1004,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp)) {
+ if (stable && !use_wgather) {
spin_lock(&file->f_lock);
file->f_flags |= O_SYNC;
spin_unlock(&file->f_lock);
@@ -1014,52 +1055,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
oldfs = get_fs(); set_fs(KERNEL_DS);
host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs);
- if (host_err >= 0) {
- *cnt = host_err;
- nfsdstats.io_write += host_err;
- fsnotify_modify(file->f_path.dentry);
- }
+ if (host_err < 0)
+ goto out_nfserr;
+ *cnt = host_err;
+ nfsdstats.io_write += host_err;
+ fsnotify_modify(file->f_path.dentry);
/* clear setuid/setgid flag after write */
- if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+ if (inode->i_mode & (S_ISUID | S_ISGID))
kill_suid(dentry);
- if (host_err >= 0 && stable) {
- static ino_t last_ino;
- static dev_t last_dev;
-
- /*
- * Gathered writes: If another process is currently
- * writing to the file, there's a high chance
- * this is another nfsd (triggered by a bulk write
- * from a client's biod). Rather than syncing the
- * file with each write request, we sleep for 10 msec.
- *
- * I don't know if this roughly approximates
- * C. Juszak's idea of gathered writes, but it's a
- * nice and simple solution (IMHO), and it seems to
- * work:-)
- */
- if (EX_WGATHER(exp)) {
- if (atomic_read(&inode->i_writecount) > 1
- || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
- dprintk("nfsd: write defer %d\n", task_pid_nr(current));
- msleep(10);
- dprintk("nfsd: write resume %d\n", task_pid_nr(current));
- }
-
- if (inode->i_state & I_DIRTY) {
- dprintk("nfsd: write sync %d\n", task_pid_nr(current));
- host_err=nfsd_sync(file);
- }
-#if 0
- wake_up(&inode->i_wait);
-#endif
- }
- last_ino = inode->i_ino;
- last_dev = inode->i_sb->s_dev;
- }
+ if (stable && use_wgather)
+ host_err = wait_for_concurrent_writes(file);
+out_nfserr:
dprintk("nfsd: write complete host_err=%d\n", host_err);
if (host_err >= 0)
err = 0;
@@ -2024,6 +2033,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
struct dentry *dentry, int acc)
{
struct inode *inode = dentry->d_inode;
+ struct path path;
int err;
if (acc == NFSD_MAY_NOP)
@@ -2096,7 +2106,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
if (err == -EACCES && S_ISREG(inode->i_mode) &&
acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
err = inode_permission(inode, MAY_EXEC);
+ if (err)
+ goto nfsd_out;
+ /* Do integrity (permission) checking now, but defer incrementing
+ * IMA counts to the actual file open.
+ */
+ path.mnt = exp->ex_path.mnt;
+ path.dentry = dentry;
+ err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
+ IMA_COUNT_LEAVE);
+nfsd_out:
return err? nfserrno(err) : 0;
}