diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 10:58:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 10:58:12 -0800 |
commit | ff0f962ca3c38239b299a70e7eea27abfbb979c3 (patch) | |
tree | ab37876cf2e8af4900f11f44ac4b96cb0f15e548 | |
parent | 087a76d390cbb8c0d21ea0cb3672ab4a7bb76362 (diff) | |
parent | c3c8699664800a68600f1988302173067eaeaffa (diff) | |
download | linux-ff0f962ca3c38239b299a70e7eea27abfbb979c3.tar.bz2 |
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi:
"This update contains:
- try to clone on copy-up
- allow renaming a directory
- split source into managable chunks
- misc cleanups and fixes
It does not contain the read-only fd data inconsistency fix, which Al
didn't like. I'll leave that to the next year..."
* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (36 commits)
ovl: fix reStructuredText syntax errors in documentation
ovl: fix return value of ovl_fill_super
ovl: clean up kstat usage
ovl: fold ovl_copy_up_truncate() into ovl_copy_up()
ovl: create directories inside merged parent opaque
ovl: opaque cleanup
ovl: show redirect_dir mount option
ovl: allow setting max size of redirect
ovl: allow redirect_dir to default to "on"
ovl: check for emptiness of redirect dir
ovl: redirect on rename-dir
ovl: lookup redirects
ovl: consolidate lookup for underlying layers
ovl: fix nested overlayfs mount
ovl: check namelen
ovl: split super.c
ovl: use d_is_dir()
ovl: simplify lookup
ovl: check lower existence of rename target
ovl: rename: simplify handling of lower/merged directory
...
-rw-r--r-- | Documentation/filesystems/overlayfs.txt | 26 | ||||
-rw-r--r-- | fs/ioctl.c | 6 | ||||
-rw-r--r-- | fs/namei.c | 6 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 3 | ||||
-rw-r--r-- | fs/overlayfs/Kconfig | 14 | ||||
-rw-r--r-- | fs/overlayfs/Makefile | 2 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 61 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 375 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 78 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 401 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 62 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 53 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 557 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 265 | ||||
-rw-r--r-- | fs/read_write.c | 23 | ||||
-rw-r--r-- | include/linux/fs.h | 13 | ||||
-rw-r--r-- | net/unix/af_unix.c | 6 |
17 files changed, 1139 insertions, 812 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index bcbf9710e4af..634d03e20c2d 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -66,7 +66,7 @@ At mount time, the two directories given as mount options "lowerdir" and "upperdir" are combined into a merged directory: mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ -workdir=/work /merged + workdir=/work /merged The "workdir" needs to be an empty directory on the same filesystem as upperdir. @@ -118,6 +118,7 @@ programs. seek offsets are assigned sequentially when the directories are read. Thus if + - read part of a directory - remember an offset, and close the directory - re-open the directory some time later @@ -130,6 +131,23 @@ directory. Readdir on directories that are not merged is simply handled by the underlying directory (upper or lower). +renaming directories +-------------------- + +When renaming a directory that is on the lower layer or merged (i.e. the +directory was not created on the upper layer to start with) overlayfs can +handle it in two different ways: + +1. return EXDEV error: this error is returned by rename(2) when trying to + move a file or directory across filesystem boundaries. Hence + applications are usually prepared to hande this error (mv(1) for example + recursively copies the directory tree). This is the default behavior. + +2. If the "redirect_dir" feature is enabled, then the directory will be + copied up (but not the contents). Then the "trusted.overlay.redirect" + extended attribute is set to the path of the original location from the + root of the overlay. Finally the directory is moved to the new + location. Non-directories --------------- @@ -185,13 +203,13 @@ filesystem, so both st_dev and st_ino of the file may change. Any open files referring to this inode will access the old data. -Any file locks (and leases) obtained before copy_up will not apply -to the copied up file. - If a file with multiple hard links is copied up, then this will "break" the link. Changes will not be propagated to other names referring to the same inode. +Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged +directory will fail with EXDEV. + Changes to underlying filesystems --------------------------------- diff --git a/fs/ioctl.c b/fs/ioctl.c index c415668c86d4..cb9b02940805 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -223,7 +223,11 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, if (!src_file.file) return -EBADF; - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = -EXDEV; + if (src_file.file->f_path.mnt != dst_file->f_path.mnt) + goto fdput; + ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); +fdput: fdput(src_file); return ret; } diff --git a/fs/namei.c b/fs/namei.c index 092ac5667ec7..2b55ea142273 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4306,11 +4306,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; - /* - * Check source == target. - * On overlayfs need to look at underlying inodes. - */ - if (d_real_inode(old_dentry) == d_real_inode(new_dentry)) + if (source == target) return 0; error = may_delete(old_dir, old_dentry, is_dir); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ca642fe9b21..357e844aee84 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -509,8 +509,7 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 34355818a2e0..0daac5112f7a 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -8,3 +8,17 @@ config OVERLAY_FS merged with the 'upper' object. For more information see Documentation/filesystems/overlayfs.txt + +config OVERLAY_FS_REDIRECT_DIR + bool "Overlayfs: turn on redirect dir feature by default" + depends on OVERLAY_FS + help + If this config option is enabled then overlay filesystems will use + redirects when renaming directories by default. In this case it is + still possible to turn off redirects globally with the + "redirect_dir=off" module option or on a filesystem instance basis + with the "redirect_dir=off" mount option. + + Note, that redirects are not backward compatible. That is, mounting + an overlay which has redirects on a kernel that doesn't support this + feature will have unexpected results. diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 900daed3e91d..99373bbc1478 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlay-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2838bddb1f91..f57043dace62 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -153,6 +153,13 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) goto out_fput; } + /* Try to use clone_file_range to clone up within the same fs */ + error = vfs_clone_file_range(old_file, 0, new_file, 0, len); + if (!error) + goto out; + /* Couldn't clone, so now we try to copy the data */ + error = 0; + /* FIXME: copy up sparse files efficiently */ while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; @@ -177,7 +184,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) len -= bytes; } - +out: if (!error) error = vfs_fsync(new_file, 0); fput(new_file); @@ -231,10 +238,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct inode *udir = upperdir->d_inode; struct dentry *newdentry = NULL; struct dentry *upper = NULL; - umode_t mode = stat->mode; int err; const struct cred *old_creds = NULL; struct cred *new_creds = NULL; + struct cattr cattr = { + /* Can't properly set mode on creation because of the umask */ + .mode = stat->mode & S_IFMT, + .rdev = stat->rdev, + .link = link + }; newdentry = ovl_lookup_temp(workdir, dentry); err = PTR_ERR(newdentry); @@ -254,10 +266,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (new_creds) old_creds = override_creds(new_creds); - /* Can't properly set mode on creation because of the umask */ - stat->mode &= S_IFMT; - err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); - stat->mode = mode; + err = ovl_create_real(wdir, newdentry, &cattr, NULL, true); if (new_creds) { revert_creds(old_creds); @@ -296,12 +305,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, ovl_dentry_update(dentry, newdentry); ovl_inode_update(d_inode(dentry), d_inode(newdentry)); newdentry = NULL; - - /* - * Non-directores become opaque when copied up. - */ - if (!S_ISDIR(stat->mode)) - ovl_dentry_set_opaque(dentry, true); out2: dput(upper); out1: @@ -317,20 +320,14 @@ out_cleanup: /* * Copy up a single dentry * - * Directory renames only allowed on "pure upper" (already created on - * upper filesystem, never copied up). Directories which are on lower or - * are merged may not be renamed. For these -EXDEV is returned and - * userspace has to deal with it. This means, when copying up a - * directory we can rely on it and ancestors being stable. - * - * Non-directory renames start with copy up of source if necessary. The - * actual rename will only proceed once the copy up was successful. Copy - * up uses upper parent i_mutex for exclusion. Since rename can change - * d_parent it is possible that the copy up will lock the old parent. At - * that point the file will have already been copied up anyway. + * All renames start with copy up of source if necessary. The actual + * rename will only proceed once the copy up was successful. Copy up uses + * upper parent i_mutex for exclusion. Since rename can change d_parent it + * is possible that the copy up will lock the old parent. At that point + * the file will have already been copied up anyway. */ -int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat) +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, + struct path *lowerpath, struct kstat *stat) { DEFINE_DELAYED_CALL(done); struct dentry *workdir = ovl_workdir(dentry); @@ -339,7 +336,6 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct path parentpath; struct dentry *lowerdentry = lowerpath->dentry; struct dentry *upperdir; - struct dentry *upperdentry; const char *link = NULL; if (WARN_ON(!workdir)) @@ -365,8 +361,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, pr_err("overlayfs: failed to lock workdir+upperdir\n"); goto out_unlock; } - upperdentry = ovl_dentry_upper(dentry); - if (upperdentry) { + if (ovl_dentry_upper(dentry)) { /* Raced with another copy-up? Nothing to do, then... */ err = 0; goto out_unlock; @@ -385,7 +380,7 @@ out_unlock: return err; } -int ovl_copy_up(struct dentry *dentry) +int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred = ovl_override_creds(dentry->d_sb); @@ -415,6 +410,9 @@ int ovl_copy_up(struct dentry *dentry) ovl_path_lower(next, &lowerpath); err = vfs_getattr(&lowerpath, &stat); + /* maybe truncate regular file. this has no effect on dirs */ + if (flags & O_TRUNC) + stat.size = 0; if (!err) err = ovl_copy_up_one(parent, next, &lowerpath, &stat); @@ -425,3 +423,8 @@ int ovl_copy_up(struct dentry *dentry) return err; } + +int ovl_copy_up(struct dentry *dentry) +{ + return ovl_copy_up_flags(dentry, 0); +} diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 306b6c161840..16e06dd89457 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -12,11 +12,18 @@ #include <linux/xattr.h> #include <linux/security.h> #include <linux/cred.h> +#include <linux/module.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/atomic.h> +#include <linux/ratelimit.h> #include "overlayfs.h" +static unsigned short ovl_redirect_max = 256; +module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); +MODULE_PARM_DESC(ovl_redirect_max, + "Maximum length of absolute redirect xattr value"); + void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) { int err; @@ -75,8 +82,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, } int ovl_create_real(struct inode *dir, struct dentry *newdentry, - struct kstat *stat, const char *link, - struct dentry *hardlink, bool debug) + struct cattr *attr, struct dentry *hardlink, bool debug) { int err; @@ -86,13 +92,13 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, if (hardlink) { err = ovl_do_link(hardlink, dir, newdentry, debug); } else { - switch (stat->mode & S_IFMT) { + switch (attr->mode & S_IFMT) { case S_IFREG: - err = ovl_do_create(dir, newdentry, stat->mode, debug); + err = ovl_do_create(dir, newdentry, attr->mode, debug); break; case S_IFDIR: - err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); + err = ovl_do_mkdir(dir, newdentry, attr->mode, debug); break; case S_IFCHR: @@ -100,11 +106,11 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, case S_IFIFO: case S_IFSOCK: err = ovl_do_mknod(dir, newdentry, - stat->mode, stat->rdev, debug); + attr->mode, attr->rdev, debug); break; case S_IFLNK: - err = ovl_do_symlink(dir, newdentry, link, debug); + err = ovl_do_symlink(dir, newdentry, attr->link, debug); break; default: @@ -121,20 +127,15 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *upperdentry) -{ - return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); -} - -static void ovl_remove_opaque(struct dentry *upperdentry) +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) { int err; - err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE); - if (err) { - pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", - upperdentry->d_name.name, err); - } + err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + if (!err) + ovl_dentry_set_opaque(dentry); + + return err; } static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, @@ -182,9 +183,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, d_instantiate(dentry, inode); } +static bool ovl_type_merge(struct dentry *dentry) +{ + return OVL_TYPE_MERGE(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, - struct kstat *stat, const char *link, - struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *udir = upperdir->d_inode; @@ -192,7 +197,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, int err; if (!hardlink && !IS_POSIXACL(udir)) - stat->mode &= ~current_umask(); + attr->mode &= ~current_umask(); inode_lock_nested(udir, I_MUTEX_PARENT); newdentry = lookup_one_len(dentry->d_name.name, upperdir, @@ -200,10 +205,15 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; - err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); + err = ovl_create_real(udir, newdentry, attr, hardlink, false); if (err) goto out_dput; + if (ovl_type_merge(dentry->d_parent)) { + /* Setting opaque here is just an optimization, allow to fail */ + ovl_set_opaque(dentry, newdentry); + } + ovl_instantiate(dentry, inode, newdentry, !!hardlink); newdentry = NULL; out_dput: @@ -270,7 +280,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (IS_ERR(opaquedir)) goto out_unlock; - err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); + err = ovl_create_real(wdir, opaquedir, + &(struct cattr){.mode = stat.mode}, NULL, true); if (err) goto out_dput; @@ -278,7 +289,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (err) goto out_cleanup; - err = ovl_set_opaque(opaquedir); + err = ovl_set_opaque(dentry, opaquedir); if (err) goto out_cleanup; @@ -370,7 +381,7 @@ out_free: } static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, - struct kstat *stat, const char *link, + struct cattr *cattr, struct dentry *hardlink) { struct dentry *workdir = ovl_workdir(dentry); @@ -387,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (!hardlink) { err = posix_acl_create(dentry->d_parent->d_inode, - &stat->mode, &default_acl, &acl); + &cattr->mode, &default_acl, &acl); if (err) return err; } @@ -407,7 +418,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (IS_ERR(upper)) goto out_dput; - err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); + err = ovl_create_real(wdir, newdentry, cattr, hardlink, true); if (err) goto out_dput2; @@ -415,10 +426,11 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, * mode could have been mutilated due to umask (e.g. sgid directory) */ if (!hardlink && - !S_ISLNK(stat->mode) && newdentry->d_inode->i_mode != stat->mode) { + !S_ISLNK(cattr->mode) && + newdentry->d_inode->i_mode != cattr->mode) { struct iattr attr = { .ia_valid = ATTR_MODE, - .ia_mode = stat->mode, + .ia_mode = cattr->mode, }; inode_lock(newdentry->d_inode); err = notify_change(newdentry, &attr, NULL); @@ -438,8 +450,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_cleanup; } - if (!hardlink && S_ISDIR(stat->mode)) { - err = ovl_set_opaque(newdentry); + if (!hardlink && S_ISDIR(cattr->mode)) { + err = ovl_set_opaque(dentry, newdentry); if (err) goto out_cleanup; @@ -475,8 +487,7 @@ out_cleanup: } static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct kstat *stat, const char *link, - struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink) { int err; const struct cred *old_cred; @@ -494,7 +505,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, override_cred->fsgid = inode->i_gid; if (!hardlink) { err = security_dentry_create_files_as(dentry, - stat->mode, &dentry->d_name, old_cred, + attr->mode, &dentry->d_name, old_cred, override_cred); if (err) { put_cred(override_cred); @@ -504,12 +515,12 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, put_cred(override_creds(override_cred)); put_cred(override_cred); - if (!ovl_dentry_is_opaque(dentry)) - err = ovl_create_upper(dentry, inode, stat, link, + if (!ovl_dentry_is_whiteout(dentry)) + err = ovl_create_upper(dentry, inode, attr, hardlink); else - err = ovl_create_over_whiteout(dentry, inode, stat, - link, hardlink); + err = ovl_create_over_whiteout(dentry, inode, attr, + hardlink); } out_revert_creds: revert_creds(old_cred); @@ -528,8 +539,9 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, { int err; struct inode *inode; - struct kstat stat = { + struct cattr attr = { .rdev = rdev, + .link = link, }; err = ovl_want_write(dentry); @@ -537,14 +549,14 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, goto out; err = -ENOMEM; - inode = ovl_new_inode(dentry->d_sb, mode); + inode = ovl_new_inode(dentry->d_sb, mode, rdev); if (!inode) goto out_drop_write; inode_init_owner(inode, dentry->d_parent->d_inode, mode); - stat.mode = inode->i_mode; + attr.mode = inode->i_mode; - err = ovl_create_or_link(dentry, inode, &stat, link, NULL); + err = ovl_create_or_link(dentry, inode, &attr, NULL); if (err) iput(inode); @@ -598,7 +610,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir, inode = d_inode(old); ihold(inode); - err = ovl_create_or_link(new, inode, NULL, NULL, ovl_dentry_upper(old)); + err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old)); if (err) iput(inode); @@ -684,8 +696,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; struct dentry *upper; + struct dentry *opaquedir = NULL; int err; + /* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */ + if (is_dir && ovl_dentry_get_redirect(dentry)) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; + } + inode_lock_nested(dir, I_MUTEX_PARENT); upper = lookup_one_len(dentry->d_name.name, upperdir, dentry->d_name.len); @@ -694,14 +715,15 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) goto out_unlock; err = -ESTALE; - if (upper == ovl_dentry_upper(dentry)) { - if (is_dir) - err = vfs_rmdir(dir, upper); - else - err = vfs_unlink(dir, upper, NULL); - ovl_dentry_version_inc(dentry->d_parent); - } - dput(upper); + if ((opaquedir && upper != opaquedir) || + (!opaquedir && upper != ovl_dentry_upper(dentry))) + goto out_dput_upper; + + if (is_dir) + err = vfs_rmdir(dir, upper); + else + err = vfs_unlink(dir, upper, NULL); + ovl_dentry_version_inc(dentry->d_parent); /* * Keeping this dentry hashed would mean having to release @@ -711,34 +733,21 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) */ if (!err) d_drop(dentry); +out_dput_upper: + dput(upper); out_unlock: inode_unlock(dir); - + dput(opaquedir); +out: return err; } -static inline int ovl_check_sticky(struct dentry *dentry) -{ - struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; - struct inode *inode = ovl_dentry_real(dentry)->d_inode; - - if (check_sticky(dir, inode)) - return -EPERM; - - return 0; -} - static int ovl_do_remove(struct dentry *dentry, bool is_dir) { enum ovl_path_type type; int err; const struct cred *old_cred; - - err = ovl_check_sticky(dentry); - if (err) - goto out; - err = ovl_want_write(dentry); if (err) goto out; @@ -750,7 +759,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) type = ovl_path_type(dentry); old_cred = ovl_override_creds(dentry->d_sb); - if (OVL_TYPE_PURE_UPPER(type)) + if (!ovl_lower_positive(dentry)) err = ovl_remove_upper(dentry, is_dir); else err = ovl_remove_and_whiteout(dentry, is_dir); @@ -777,13 +786,114 @@ static int ovl_rmdir(struct inode *dir, struct dentry *dentry) return ovl_do_remove(dentry, true); } -static int ovl_rename2(struct inode *olddir, struct dentry *old, - struct inode *newdir, struct dentry *new, - unsigned int flags) +static bool ovl_type_merge_or_lower(struct dentry *dentry) +{ + enum ovl_path_type type = ovl_path_type(dentry); + + return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type); +} + +static bool ovl_can_move(struct dentry *dentry) +{ + return ovl_redirect_dir(dentry->d_sb) || + !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry); +} + +static char *ovl_get_redirect(struct dentry *dentry, bool samedir) +{ + char *buf, *ret; + struct dentry *d, *tmp; + int buflen = ovl_redirect_max + 1; + + if (samedir) { + ret = kstrndup(dentry->d_name.name, dentry->d_name.len, + GFP_KERNEL); + goto out; + } + + buf = ret = kmalloc(buflen, GFP_TEMPORARY); + if (!buf) + goto out; + + buflen--; + buf[buflen] = '\0'; + for (d = dget(dentry); !IS_ROOT(d);) { + const char *name; + int thislen; + + spin_lock(&d->d_lock); + name = ovl_dentry_get_redirect(d); + if (name) { + thislen = strlen(name); + } else { + name = d->d_name.name; + thislen = d->d_name.len; + } + + /* If path is too long, fall back to userspace move */ + if (thislen + (name[0] != '/') > buflen) { + ret = ERR_PTR(-EXDEV); + spin_unlock(&d->d_lock); + goto out_put; + } + + buflen -= thislen; + memcpy(&buf[buflen], name, thislen); + tmp = dget_dlock(d->d_parent); + spin_unlock(&d->d_lock); + + dput(d); + d = tmp; + + /* Absolute redirect: finished */ + if (buf[buflen] == '/') + break; + buflen--; + buf[buflen] = '/'; + } + ret = kstrdup(&buf[buflen], GFP_KERNEL); +out_put: + dput(d); + kfree(buf); +out: + return ret ? ret : ERR_PTR(-ENOMEM); +} + +static int ovl_set_redirect(struct dentry *dentry, bool samedir) +{ + int err; + const char *redirect = ovl_dentry_get_redirect(dentry); + + if (redirect && (samedir || redirect[0] == '/')) + return 0; + + redirect = ovl_get_redirect(dentry, samedir); + if (IS_ERR(redirect)) + return PTR_ERR(redirect); + + err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, + redirect, strlen(redirect), 0); + if (!err) { + spin_lock(&dentry->d_lock); + ovl_dentry_set_redirect(dentry, redirect); + spin_unlock(&dentry->d_lock); + } else { + kfree(redirect); + if (err == -EOPNOTSUPP) + ovl_clear_redirect_dir(dentry->d_sb); + else + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + /* Fall back to userspace copy-up */ + err = -EXDEV; + } + return err; +} + +static int ovl_rename(struct inode *olddir, struct dentry *old, + struct inode *newdir, struct dentry *new, + unsigned int flags) { int err; - enum ovl_path_type old_type; - enum ovl_path_type new_type; struct dentry *old_upperdir; struct dentry *new_upperdir; struct dentry *olddentry; @@ -794,7 +904,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, bool cleanup_whiteout = false; bool overwrite = !(flags & RENAME_EXCHANGE); bool is_dir = d_is_dir(old); - bool new_is_dir = false; + bool new_is_dir = d_is_dir(new); + bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; @@ -804,46 +915,12 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, flags &= ~RENAME_NOREPLACE; - err = ovl_check_sticky(old); - if (err) - goto out; - /* Don't copy up directory trees */ - old_type = ovl_path_type(old); err = -EXDEV; - if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir) + if (!ovl_can_move(old)) + goto out; + if (!overwrite && !ovl_can_move(new)) goto out; - - if (new->d_inode) { - err = ovl_check_sticky(new); - if (err) - goto out; - - if (d_is_dir(new)) - new_is_dir = true; - - new_type = ovl_path_type(new); - err = -EXDEV; - if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) - goto out; - - err = 0; - if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) { - if (ovl_dentry_lower(old)->d_inode == - ovl_dentry_lower(new)->d_inode) - goto out; - } - if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) { - if (ovl_dentry_upper(old)->d_inode == - ovl_dentry_upper(new)->d_inode) - goto out; - } - } else { - if (ovl_dentry_is_opaque(new)) - new_type = __OVL_PATH_UPPER; - else - new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE; - } err = ovl_want_write(old); if (err) @@ -862,12 +939,9 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, goto out_drop_write; } - old_opaque = !OVL_TYPE_PURE_UPPER(old_type); - new_opaque = !OVL_TYPE_PURE_UPPER(new_type); - old_cred = ovl_override_creds(old->d_sb); - if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { + if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) { opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { @@ -877,15 +951,15 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite) { - if (old_opaque) { - if (new->d_inode || !new_opaque) { + if (ovl_lower_positive(old)) { + if (!ovl_dentry_is_whiteout(new)) { /* Whiteout source */ flags |= RENAME_WHITEOUT; } else { /* Switch whiteouts */ flags |= RENAME_EXCHANGE; } - } else if (is_dir && !new->d_inode && new_opaque) { + } else if (is_dir && ovl_dentry_is_whiteout(new)) { flags |= RENAME_EXCHANGE; cleanup_whiteout = true; } @@ -896,7 +970,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, trap = lock_rename(new_upperdir, old_upperdir); - olddentry = lookup_one_len(old->d_name.name, old_upperdir, old->d_name.len); err = PTR_ERR(olddentry); @@ -913,6 +986,9 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, if (IS_ERR(newdentry)) goto out_dput_old; + old_opaque = ovl_dentry_is_opaque(old); + new_opaque = ovl_dentry_is_opaque(new); + err = -ESTALE; if (ovl_dentry_upper(new)) { if (opaquedir) { @@ -933,54 +1009,31 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, if (newdentry == trap) goto out_dput; - if (is_dir && !old_opaque && new_opaque) { - err = ovl_set_opaque(olddentry); + if (WARN_ON(olddentry->d_inode == newdentry->d_inode)) + goto out_dput; + + err = 0; + if (is_dir) { + if (ovl_type_merge_or_lower(old)) + err = ovl_set_redirect(old, samedir); + else if (!old_opaque && ovl_type_merge(new->d_parent)) + err = ovl_set_opaque(old, olddentry); if (err) goto out_dput; } - if (!overwrite && new_is_dir && old_opaque && !new_opaque) { - err = ovl_set_opaque(newdentry); + if (!overwrite && new_is_dir) { + if (ovl_type_merge_or_lower(new)) + err = ovl_set_redirect(new, samedir); + else if (!new_opaque && ovl_type_merge(old->d_parent)) + err = ovl_set_opaque(new, newdentry); if (err) goto out_dput; } - if (old_opaque || new_opaque) { - err = ovl_do_rename(old_upperdir->d_inode, olddentry, - new_upperdir->d_inode, newdentry, - flags); - } else { - /* No debug for the plain case */ - BUG_ON(flags & ~RENAME_EXCHANGE); - err = vfs_rename(old_upperdir->d_inode, olddentry, - new_upperdir->d_inode, newdentry, - NULL, flags); - } - - if (err) { - if (is_dir && !old_opaque && new_opaque) - ovl_remove_opaque(olddentry); - if (!overwrite && new_is_dir && old_opaque && !new_opaque) - ovl_remove_opaque(newdentry); + err = ovl_do_rename(old_upperdir->d_inode, olddentry, + new_upperdir->d_inode, newdentry, flags); + if (err) goto out_dput; - } - - if (is_dir && old_opaque && !new_opaque) - ovl_remove_opaque(olddentry); - if (!overwrite && new_is_dir && !old_opaque && new_opaque) - ovl_remove_opaque(newdentry); - - /* - * Old dentry now lives in different location. Dentries in - * lowerstack are stale. We cannot drop them here because - * access to them is lockless. This could be only pure upper - * or opaque directory - numlower is zero. Or upper non-dir - * entry - its pureness is tracked by flag opaque. - */ - if (old_opaque != new_opaque) { - ovl_dentry_set_opaque(old, new_opaque); - if (!overwrite) - ovl_dentry_set_opaque(new, old_opaque); - } if (cleanup_whiteout) ovl_cleanup(old_upperdir->d_inode, newdentry); @@ -1009,7 +1062,7 @@ const struct inode_operations ovl_dir_inode_operations = { .symlink = ovl_symlink, .unlink = ovl_unlink, .rmdir = ovl_rmdir, - .rename = ovl_rename2, + .rename = ovl_rename, .link = ovl_link, .setattr = ovl_setattr, .create = ovl_create, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 7fb53d055537..1ab8b0dbc237 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -13,34 +13,6 @@ #include <linux/posix_acl.h> #include "overlayfs.h" -static int ovl_copy_up_truncate(struct dentry *dentry) -{ - int err; - struct dentry *parent; - struct kstat stat; - struct path lowerpath; - const struct cred *old_cred; - - parent = dget_parent(dentry); - err = ovl_copy_up(parent); - if (err) - goto out_dput_parent; - - ovl_path_lower(dentry, &lowerpath); - - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&lowerpath, &stat); - if (!err) { - stat.size = 0; - err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); - } - revert_creds(old_cred); - -out_dput_parent: - dput(parent); - return err; -} - int ovl_setattr(struct dentry *dentry, struct iattr *attr) { int err; @@ -64,27 +36,10 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; - if (attr->ia_valid & ATTR_SIZE) { - struct inode *realinode = d_inode(ovl_dentry_real(dentry)); - - err = -ETXTBSY; - if (atomic_read(&realinode->i_writecount) < 0) - goto out_drop_write; - } - err = ovl_copy_up(dentry); if (!err) { - struct inode *winode = NULL; - upperdentry = ovl_dentry_upper(dentry); - if (attr->ia_valid & ATTR_SIZE) { - winode = d_inode(upperdentry); - err = get_write_access(winode); - if (err) - goto out_drop_write; - } - if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; @@ -95,11 +50,7 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (!err) ovl_copyattr(upperdentry->d_inode, dentry->d_inode); inode_unlock(upperdentry->d_inode); - - if (winode) - put_write_access(winode); } -out_drop_write: ovl_drop_write(dentry); out: return err; @@ -302,10 +253,7 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); if (!err) { - if (file_flags & O_TRUNC) - err = ovl_copy_up_truncate(dentry); - else - err = ovl_copy_up(dentry); + err = ovl_copy_up_flags(dentry, file_flags); ovl_drop_write(dentry); } } @@ -354,7 +302,7 @@ static const struct inode_operations ovl_symlink_inode_operations = { .update_time = ovl_update_time, }; -static void ovl_fill_inode(struct inode *inode, umode_t mode) +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_ino = get_next_ino(); inode->i_mode = mode; @@ -363,8 +311,11 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif - mode &= S_IFMT; - switch (mode) { + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_op = &ovl_file_inode_operations; + break; + case S_IFDIR: inode->i_op = &ovl_dir_inode_operations; inode->i_fop = &ovl_dir_operations; @@ -375,26 +326,19 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode) break; default: - WARN(1, "illegal file type: %i\n", mode); - /* Fall through */ - - case S_IFREG: - case S_IFSOCK: - case S_IFBLK: - case S_IFCHR: - case S_IFIFO: inode->i_op = &ovl_file_inode_operations; + init_special_inode(inode, mode, rdev); break; } } -struct inode *ovl_new_inode(struct super_block *sb, umode_t mode) +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) { struct inode *inode; inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode); + ovl_fill_inode(inode, mode, rdev); return inode; } @@ -418,7 +362,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode) inode = iget5_locked(sb, (unsigned long) realinode, ovl_inode_test, ovl_inode_set, realinode); if (inode && inode->i_state & I_NEW) { - ovl_fill_inode(inode, realinode->i_mode); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); set_nlink(inode, realinode->i_nlink); unlock_new_inode(inode); } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c new file mode 100644 index 000000000000..9ad48d9202a9 --- /dev/null +++ b/fs/overlayfs/namei.c @@ -0,0 +1,401 @@ +/* + * Copyright (C) 2011 Novell Inc. + * Copyright (C) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/xattr.h> +#include <linux/ratelimit.h> +#include "overlayfs.h" +#include "ovl_entry.h" + +struct ovl_lookup_data { + struct qstr name; + bool is_dir; + bool opaque; + bool stop; + bool last; + char *redirect; +}; + +static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, + size_t prelen, const char *post) +{ + int res; + char *s, *next, *buf = NULL; + + res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); + if (res < 0) { + if (res == -ENODATA || res == -EOPNOTSUPP) + return 0; + goto fail; + } + buf = kzalloc(prelen + res + strlen(post) + 1, GFP_TEMPORARY); + if (!buf) + return -ENOMEM; + + if (res == 0) + goto invalid; + + res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); + if (res < 0) + goto fail; + if (res == 0) + goto invalid; + if (buf[0] == '/') { + for (s = buf; *s++ == '/'; s = next) { + next = strchrnul(s, '/'); + if (s == next) + goto invalid; + } + } else { + if (strchr(buf, '/') != NULL) + goto invalid; + + memmove(buf + prelen, buf, res); + memcpy(buf, d->name.name, prelen); + } + + strcat(buf, post); + kfree(d->redirect); + d->redirect = buf; + d->name.name = d->redirect; + d->name.len = strlen(d->redirect); + + return 0; + +err_free: + kfree(buf); + return 0; +fail: + pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); + goto err_free; +invalid: + pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); + goto err_free; +} + +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + +static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, + const char *name, unsigned int namelen, + size_t prelen, const char *post, + struct dentry **ret) +{ + struct dentry *this; + int err; + + this = lookup_one_len_unlocked(name, base, namelen); + if (IS_ERR(this)) { + err = PTR_ERR(this); + this = NULL; + if (err == -ENOENT || err == -ENAMETOOLONG) + goto out; + goto out_err; + } + if (!this->d_inode) + goto put_and_out; + + if (ovl_dentry_weird(this)) { + /* Don't support traversing automounts and other weirdness */ + err = -EREMOTE; + goto out_err; + } + if (ovl_is_whiteout(this)) { + d->stop = d->opaque = true; + goto put_and_out; + } + if (!d_can_lookup(this)) { + d->stop = true; + if (d->is_dir) + goto put_and_out; + goto out; + } + d->is_dir = true; + if (!d->last && ovl_is_opaquedir(this)) { + d->stop = d->opaque = true; + goto out; + } + err = ovl_check_redirect(this, d, prelen, post); + if (err) + goto out_err; +out: + *ret = this; + return 0; + +put_and_out: + dput(this); + this = NULL; + goto out; + +out_err: + dput(this); + return err; +} + +static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, + struct dentry **ret) +{ + const char *s = d->name.name; + struct dentry *dentry = NULL; + int err; + + if (*s != '/') + return ovl_lookup_single(base, d, d->name.name, d->name.len, + 0, "", ret); + + while (*s++ == '/' && !IS_ERR_OR_NULL(base) && d_can_lookup(base)) { + const char *next = strchrnul(s, '/'); + size_t slen = strlen(s); + + if (WARN_ON(slen > d->name.len) || + WARN_ON(strcmp(d->name.name + d->name.len - slen, s))) + return -EIO; + + err = ovl_lookup_single(base, d, s, next - s, + d->name.len - slen, next, &base); + dput(dentry); + if (err) + return err; + dentry = base; + s = next; + } + *ret = dentry; + return 0; +} + +/* + * Returns next layer in stack starting from top. + * Returns -1 if this is the last layer. + */ +int ovl_path_next(int idx, struct dentry *dentry, struct path *path) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + BUG_ON(idx < 0); + if (idx == 0) { + ovl_path_upper(dentry, path); + if (path->dentry) + return oe->numlower ? 1 : -1; + idx++; + } + BUG_ON(idx > oe->numlower); + *path = oe->lowerstack[idx - 1]; + + return (idx < oe->numlower) ? idx + 1 : -1; +} + +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct ovl_entry *oe; + const struct cred *old_cred; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct path *stack = NULL; + struct dentry *upperdir, *upperdentry = NULL; + unsigned int ctr = 0; + struct inode *inode = NULL; + bool upperopaque = false; + char *upperredirect = NULL; + struct dentry *this; + unsigned int i; + int err; + struct ovl_lookup_data d = { + .name = dentry->d_name, + .is_dir = false, + .opaque = false, + .stop = false, + .last = !poe->numlower, + .redirect = NULL, + }; + + if (dentry->d_name.len > ofs->namelen) + return ERR_PTR(-ENAMETOOLONG); + + old_cred = ovl_override_creds(dentry->d_sb); + upperdir = ovl_upperdentry_dereference(poe); + if (upperdir) { + err = ovl_lookup_layer(upperdir, &d, &upperdentry); + if (err) + goto out; + + if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { + dput(upperdentry); + err = -EREMOTE; + goto out; + } + + if (d.redirect) { + upperredirect = kstrdup(d.redirect, GFP_KERNEL); + if (!upperredirect) + goto out_put_upper; + if (d.redirect[0] == '/') + poe = dentry->d_sb->s_root->d_fsdata; + } + upperopaque = d.opaque; + } + + if (!d.stop && poe->numlower) { + err = -ENOMEM; + stack = kcalloc(ofs->numlower, sizeof(struct path), + GFP_TEMPORARY); + if (!stack) + goto out_put_upper; + } + + for (i = 0; !d.stop && i < poe->numlower; i++) { + struct path lowerpath = poe->lowerstack[i]; + + d.last = i == poe->numlower - 1; + err = ovl_lookup_layer(lowerpath.dentry, &d, &this); + if (err) + goto out_put; + + if (!this) + continue; + + stack[ctr].dentry = this; + stack[ctr].mnt = lowerpath.mnt; + ctr++; + + if (d.stop) + break; + + if (d.redirect && + d.redirect[0] == '/' && + poe != dentry->d_sb->s_root->d_fsdata) { + poe = dentry->d_sb->s_root->d_fsdata; + + /* Find the current layer on the root dentry */ + for (i = 0; i < poe->numlower; i++) + if (poe->lowerstack[i].mnt == lowerpath.mnt) + break; + if (WARN_ON(i == poe->numlower)) + break; + } + } + + oe = ovl_alloc_entry(ctr); + err = -ENOMEM; + if (!oe) + goto out_put; + + if (upperdentry || ctr) { + struct dentry *realdentry; + struct inode *realinode; + + realdentry = upperdentry ? upperdentry : stack[0].dentry; + realinode = d_inode(realdentry); + + err = -ENOMEM; + if (upperdentry && !d_is_dir(upperdentry)) { + inode = ovl_get_inode(dentry->d_sb, realinode); + } else { + inode = ovl_new_inode(dentry->d_sb, realinode->i_mode, + realinode->i_rdev); + if (inode) + ovl_inode_init(inode, realinode, !!upperdentry); + } + if (!inode) + goto out_free_oe; + ovl_copyattr(realdentry->d_inode, inode); + } + + revert_creds(old_cred); + oe->opaque = upperopaque; + oe->redirect = upperredirect; + oe->__upperdentry = upperdentry; + memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + kfree(stack); + kfree(d.redirect); + dentry->d_fsdata = oe; + d_add(dentry, inode); + + return NULL; + +out_free_oe: + kfree(oe); +out_put: + for (i = 0; i < ctr; i++) + dput(stack[i].dentry); + kfree(stack); +out_put_upper: + dput(upperdentry); + kfree(upperredirect); +out: + kfree(d.redirect); + revert_creds(old_cred); + return ERR_PTR(err); +} + +bool ovl_lower_positive(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *poe = dentry->d_parent->d_fsdata; + const struct qstr *name = &dentry->d_name; + unsigned int i; + bool positive = false; + bool done = false; + + /* + * If dentry is negative, then lower is positive iff this is a + * whiteout. + */ + if (!dentry->d_inode) + return oe->opaque; + + /* Negative upper -> positive lower */ + if (!oe->__upperdentry) + return true; + + /* Positive upper -> have to look up lower to see whether it exists */ + for (i = 0; !done && !positive && i < poe->numlower; i++) { + struct dentry *this; + struct dentry *lowerdir = poe->lowerstack[i].dentry; + + this = lookup_one_len_unlocked(name->name, lowerdir, + name->len); + if (IS_ERR(this)) { + switch (PTR_ERR(this)) { + case -ENOENT: + case -ENAMETOOLONG: + break; + + default: + /* + * Assume something is there, we just couldn't + * access it. + */ + positive = true; + break; + } + } else { + if (this->d_inode) { + positive = !ovl_is_whiteout(this); + done = true; + } + dput(this); + } + } + + return positive; +} diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e218e741cb99..8af450b0e57a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -9,23 +9,17 @@ #include <linux/kernel.h> -struct ovl_entry; - enum ovl_path_type { - __OVL_PATH_PURE = (1 << 0), - __OVL_PATH_UPPER = (1 << 1), - __OVL_PATH_MERGE = (1 << 2), + __OVL_PATH_UPPER = (1 << 0), + __OVL_PATH_MERGE = (1 << 1), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) -#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE) -#define OVL_TYPE_MERGE_OR_LOWER(type) \ - (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) - #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" +#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" #define OVL_ISUPPER_MASK 1UL @@ -143,35 +137,43 @@ static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) return (struct inode *) (x & ~OVL_ISUPPER_MASK); } +/* util.c */ +int ovl_want_write(struct dentry *dentry); +void ovl_drop_write(struct dentry *dentry); +struct dentry *ovl_workdir(struct dentry *dentry); +const struct cred *ovl_override_creds(struct super_block *sb); +struct ovl_entry *ovl_alloc_entry(unsigned int numlower); +bool ovl_dentry_remote(struct dentry *dentry); +bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); -u64 ovl_dentry_version_get(struct dentry *dentry); -void ovl_dentry_version_inc(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); -int ovl_path_next(int idx, struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); -struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode, - bool is_upper); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); -struct dentry *ovl_workdir(struct dentry *dentry); -int ovl_want_write(struct dentry *dentry); -void ovl_drop_write(struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); -void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); -bool ovl_is_whiteout(struct dentry *dentry); -const struct cred *ovl_override_creds(struct super_block *sb); +bool ovl_dentry_is_whiteout(struct dentry *dentry); +void ovl_dentry_set_opaque(struct dentry *dentry); +bool ovl_redirect_dir(struct super_block *sb); +void ovl_clear_redirect_dir(struct super_block *sb); +const char *ovl_dentry_get_redirect(struct dentry *dentry); +void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); +void ovl_inode_init(struct inode *inode, struct inode *realinode, + bool is_upper); void ovl_inode_update(struct inode *inode, struct inode *upperinode); -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags); +void ovl_dentry_version_inc(struct dentry *dentry); +u64 ovl_dentry_version_get(struct dentry *dentry); +bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); -struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, - struct kstat *stat, const char *link); +/* namei.c */ +int ovl_path_next(int idx, struct dentry *dentry, struct path *path); +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +bool ovl_lower_positive(struct dentry *dentry); /* readdir.c */ extern const struct file_operations ovl_dir_operations; @@ -195,7 +197,7 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); -struct inode *ovl_new_inode(struct super_block *sb, umode_t mode); +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); static inline void ovl_copyattr(struct inode *from, struct inode *to) { @@ -210,14 +212,18 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct cattr { + dev_t rdev; + umode_t mode; + const char *link; +}; int ovl_create_real(struct inode *dir, struct dentry *newdentry, - struct kstat *stat, const char *link, + struct cattr *attr, struct dentry *hardlink, bool debug); void ovl_cleanup(struct inode *dir, struct dentry *dentry); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); -int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat); +int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h new file mode 100644 index 000000000000..d14bca1850d9 --- /dev/null +++ b/fs/overlayfs/ovl_entry.h @@ -0,0 +1,53 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * Copyright (C) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +struct ovl_config { + char *lowerdir; + char *upperdir; + char *workdir; + bool default_permissions; + bool redirect_dir; +}; + +/* private information held for overlayfs's superblock */ +struct ovl_fs { + struct vfsmount *upper_mnt; + unsigned numlower; + struct vfsmount **lower_mnt; + struct dentry *workdir; + long namelen; + /* pathnames of lower and upper dirs, for show_options */ + struct ovl_config config; + /* creds of process who forced instantiation of super block */ + const struct cred *creator_cred; +}; + +/* private information held for every overlayfs dentry */ +struct ovl_entry { + struct dentry *__upperdentry; + struct ovl_dir_cache *cache; + union { + struct { + u64 version; + const char *redirect; + bool opaque; + }; + struct rcu_head rcu; + }; + unsigned numlower; + struct path lowerstack[]; +}; + +struct ovl_entry *ovl_alloc_entry(unsigned int numlower); + +static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) +{ + return lockless_dereference(oe->__upperdentry); +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0e100856c7b8..20f48abbb82f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,280 +9,29 @@ #include <linux/fs.h> #include <linux/namei.h> -#include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/security.h> #include <linux/mount.h> -#include <linux/slab.h> #include <linux/parser.h> #include <linux/module.h> -#include <linux/sched.h> #include <linux/statfs.h> #include <linux/seq_file.h> #include <linux/posix_acl_xattr.h> #include "overlayfs.h" +#include "ovl_entry.h" MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -struct ovl_config { - char *lowerdir; - char *upperdir; - char *workdir; - bool default_permissions; -}; - -/* private information held for overlayfs's superblock */ -struct ovl_fs { - struct vfsmount *upper_mnt; - unsigned numlower; - struct vfsmount **lower_mnt; - struct dentry *workdir; - long lower_namelen; - /* pathnames of lower and upper dirs, for show_options */ - struct ovl_config config; - /* creds of process who forced instantiation of super block */ - const struct cred *creator_cred; -}; struct ovl_dir_cache; -/* private information held for every overlayfs dentry */ -struct ovl_entry { - struct dentry *__upperdentry; - struct ovl_dir_cache *cache; - union { - struct { - u64 version; - bool opaque; - }; - struct rcu_head rcu; - }; - unsigned numlower; - struct path lowerstack[]; -}; - #define OVL_MAX_STACK 500 -static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) -{ - return oe->numlower ? oe->lowerstack[0].dentry : NULL; -} - -enum ovl_path_type ovl_path_type(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - enum ovl_path_type type = 0; - - if (oe->__upperdentry) { - type = __OVL_PATH_UPPER; - - /* - * Non-dir dentry can hold lower dentry from previous - * location. Its purity depends only on opaque flag. - */ - if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode)) - type |= __OVL_PATH_MERGE; - else if (!oe->opaque) - type |= __OVL_PATH_PURE; - } else { - if (oe->numlower > 1) - type |= __OVL_PATH_MERGE; - } - return type; -} - -static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) -{ - return lockless_dereference(oe->__upperdentry); -} - -void ovl_path_upper(struct dentry *dentry, struct path *path) -{ - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; - - path->mnt = ofs->upper_mnt; - path->dentry = ovl_upperdentry_dereference(oe); -} - -enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) -{ - enum ovl_path_type type = ovl_path_type(dentry); - - if (!OVL_TYPE_UPPER(type)) - ovl_path_lower(dentry, path); - else - ovl_path_upper(dentry, path); - - return type; -} - -struct dentry *ovl_dentry_upper(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return ovl_upperdentry_dereference(oe); -} - -struct dentry *ovl_dentry_lower(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return __ovl_dentry_lower(oe); -} - -struct dentry *ovl_dentry_real(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - struct dentry *realdentry; - - realdentry = ovl_upperdentry_dereference(oe); - if (!realdentry) - realdentry = __ovl_dentry_lower(oe); - - return realdentry; -} - -static void ovl_inode_init(struct inode *inode, struct inode *realinode, - bool is_upper) -{ - WRITE_ONCE(inode->i_private, (unsigned long) realinode | - (is_upper ? OVL_ISUPPER_MASK : 0)); -} - -struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode, - bool is_upper) -{ - if (is_upper) { - struct ovl_fs *ofs = inode->i_sb->s_fs_info; - - return ofs->upper_mnt; - } else { - return oe->numlower ? oe->lowerstack[0].mnt : NULL; - } -} - -struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->cache; -} - -void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - oe->cache = cache; -} - -void ovl_path_lower(struct dentry *dentry, struct path *path) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; -} - -int ovl_want_write(struct dentry *dentry) -{ - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - return mnt_want_write(ofs->upper_mnt); -} - -void ovl_drop_write(struct dentry *dentry) -{ - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - mnt_drop_write(ofs->upper_mnt); -} - -struct dentry *ovl_workdir(struct dentry *dentry) -{ - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - return ofs->workdir; -} - -bool ovl_dentry_is_opaque(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - return oe->opaque; -} - -void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) -{ - struct ovl_entry *oe = dentry->d_fsdata; - oe->opaque = opaque; -} - -void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); - WARN_ON(oe->__upperdentry); - /* - * Make sure upperdentry is consistent before making it visible to - * ovl_upperdentry_dereference(). - */ - smp_wmb(); - oe->__upperdentry = upperdentry; -} - -void ovl_inode_update(struct inode *inode, struct inode *upperinode) -{ - WARN_ON(!upperinode); - WARN_ON(!inode_unhashed(inode)); - WRITE_ONCE(inode->i_private, - (unsigned long) upperinode | OVL_ISUPPER_MASK); - if (!S_ISDIR(upperinode->i_mode)) - __insert_inode_hash(inode, (unsigned long) upperinode); -} - -void ovl_dentry_version_inc(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - WARN_ON(!inode_is_locked(dentry->d_inode)); - oe->version++; -} - -u64 ovl_dentry_version_get(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - WARN_ON(!inode_is_locked(dentry->d_inode)); - return oe->version; -} - -bool ovl_is_whiteout(struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - - return inode && IS_WHITEOUT(inode); -} - -const struct cred *ovl_override_creds(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - return override_creds(ofs->creator_cred); -} - -static bool ovl_is_opaquedir(struct dentry *dentry) -{ - int res; - char val; - - if (!d_is_dir(dentry)) - return false; - - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); - if (res == 1 && val == 'y') - return true; - - return false; -} +static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); +module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); +MODULE_PARM_DESC(ovl_redirect_dir_def, + "Default to on or off for the redirect_dir feature"); static void ovl_dentry_release(struct dentry *dentry) { @@ -292,6 +41,7 @@ static void ovl_dentry_release(struct dentry *dentry) unsigned int i; dput(oe->__upperdentry); + kfree(oe->redirect); for (i = 0; i < oe->numlower; i++) dput(oe->lowerstack[i].dentry); kfree_rcu(oe, rcu); @@ -304,7 +54,7 @@ static struct dentry *ovl_d_real(struct dentry *dentry, { struct dentry *real; - if (d_is_dir(dentry)) { + if (!d_is_reg(dentry)) { if (!inode || inode == d_inode(dentry)) return dentry; goto bug; @@ -392,226 +142,6 @@ static const struct dentry_operations ovl_reval_dentry_operations = { .d_weak_revalidate = ovl_dentry_weak_revalidate, }; -static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) -{ - size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); - struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); - - if (oe) - oe->numlower = numlower; - - return oe; -} - -static bool ovl_dentry_remote(struct dentry *dentry) -{ - return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_REAL); -} - -static bool ovl_dentry_weird(struct dentry *dentry) -{ - return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | - DCACHE_MANAGE_TRANSIT | - DCACHE_OP_HASH | - DCACHE_OP_COMPARE); -} - -static inline struct dentry *ovl_lookup_real(struct dentry *dir, - const struct qstr *name) -{ - struct dentry *dentry; - - dentry = lookup_one_len_unlocked(name->name, dir, name->len); - - if (IS_ERR(dentry)) { - if (PTR_ERR(dentry) == -ENOENT) - dentry = NULL; - } else if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } else if (ovl_dentry_weird(dentry)) { - dput(dentry); - /* Don't support traversing automounts and other weirdness */ - dentry = ERR_PTR(-EREMOTE); - } - return dentry; -} - -/* - * Returns next layer in stack starting from top. - * Returns -1 if this is the last layer. - */ -int ovl_path_next(int idx, struct dentry *dentry, struct path *path) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - BUG_ON(idx < 0); - if (idx == 0) { - ovl_path_upper(dentry, path); - if (path->dentry) - return oe->numlower ? 1 : -1; - idx++; - } - BUG_ON(idx > oe->numlower); - *path = oe->lowerstack[idx - 1]; - - return (idx < oe->numlower) ? idx + 1 : -1; -} - -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - struct ovl_entry *oe; - const struct cred *old_cred; - struct ovl_entry *poe = dentry->d_parent->d_fsdata; - struct path *stack = NULL; - struct dentry *upperdir, *upperdentry = NULL; - unsigned int ctr = 0; - struct inode *inode = NULL; - bool upperopaque = false; - struct dentry *this, *prev = NULL; - unsigned int i; - int err; - - old_cred = ovl_override_creds(dentry->d_sb); - upperdir = ovl_upperdentry_dereference(poe); - if (upperdir) { - this = ovl_lookup_real(upperdir, &dentry->d_name); - err = PTR_ERR(this); - if (IS_ERR(this)) - goto out; - - if (this) { - if (unlikely(ovl_dentry_remote(this))) { - dput(this); - err = -EREMOTE; - goto out; - } - if (ovl_is_whiteout(this)) { - dput(this); - this = NULL; - upperopaque = true; - } else if (poe->numlower && ovl_is_opaquedir(this)) { - upperopaque = true; - } - } - upperdentry = prev = this; - } - - if (!upperopaque && poe->numlower) { - err = -ENOMEM; - stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); - if (!stack) - goto out_put_upper; - } - - for (i = 0; !upperopaque && i < poe->numlower; i++) { - bool opaque = false; - struct path lowerpath = poe->lowerstack[i]; - - this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); - err = PTR_ERR(this); - if (IS_ERR(this)) { - /* - * If it's positive, then treat ENAMETOOLONG as ENOENT. - */ - if (err == -ENAMETOOLONG && (upperdentry || ctr)) - continue; - goto out_put; - } - if (!this) - continue; - if (ovl_is_whiteout(this)) { - dput(this); - break; - } - /* - * Only makes sense to check opaque dir if this is not the - * lowermost layer. - */ - if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) - opaque = true; - - if (prev && (!S_ISDIR(prev->d_inode->i_mode) || - !S_ISDIR(this->d_inode->i_mode))) { - /* - * FIXME: check for upper-opaqueness maybe better done - * in remove code. - */ - if (prev == upperdentry) - upperopaque = true; - dput(this); - break; - } - /* - * If this is a non-directory then stop here. - */ - if (!S_ISDIR(this->d_inode->i_mode)) - opaque = true; - - stack[ctr].dentry = this; - stack[ctr].mnt = lowerpath.mnt; - ctr++; - prev = this; - if (opaque) - break; - } - - oe = ovl_alloc_entry(ctr); - err = -ENOMEM; - if (!oe) - goto out_put; - - if (upperdentry || ctr) { - struct dentry *realdentry; - struct inode *realinode; - - realdentry = upperdentry ? upperdentry : stack[0].dentry; - realinode = d_inode(realdentry); - - err = -ENOMEM; - if (upperdentry && !d_is_dir(upperdentry)) { - inode = ovl_get_inode(dentry->d_sb, realinode); - } else { - inode = ovl_new_inode(dentry->d_sb, realinode->i_mode); - if (inode) - ovl_inode_init(inode, realinode, !!upperdentry); - } - if (!inode) - goto out_free_oe; - ovl_copyattr(realdentry->d_inode, inode); - } - - revert_creds(old_cred); - oe->opaque = upperopaque; - oe->__upperdentry = upperdentry; - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); - kfree(stack); - dentry->d_fsdata = oe; - d_add(dentry, inode); - - return NULL; - -out_free_oe: - kfree(oe); -out_put: - for (i = 0; i < ctr; i++) - dput(stack[i].dentry); - kfree(stack); -out_put_upper: - dput(upperdentry); -out: - revert_creds(old_cred); - return ERR_PTR(err); -} - -struct file *ovl_path_open(struct path *path, int flags) -{ - return dentry_open(path, flags | O_NOATIME, current_cred()); -} - static void ovl_put_super(struct super_block *sb) { struct ovl_fs *ufs = sb->s_fs_info; @@ -649,7 +179,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) err = vfs_statfs(&path, buf); if (!err) { - buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); + buf->f_namelen = ofs->namelen; buf->f_type = OVERLAYFS_SUPER_MAGIC; } @@ -674,6 +204,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) } if (ufs->config.default_permissions) seq_puts(m, ",default_permissions"); + if (ufs->config.redirect_dir != ovl_redirect_dir_def) + seq_printf(m, ",redirect_dir=%s", + ufs->config.redirect_dir ? "on" : "off"); return 0; } @@ -700,6 +233,8 @@ enum { OPT_UPPERDIR, OPT_WORKDIR, OPT_DEFAULT_PERMISSIONS, + OPT_REDIRECT_DIR_ON, + OPT_REDIRECT_DIR_OFF, OPT_ERR, }; @@ -708,6 +243,8 @@ static const match_table_t ovl_tokens = { {OPT_UPPERDIR, "upperdir=%s"}, {OPT_WORKDIR, "workdir=%s"}, {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, + {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, + {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, {OPT_ERR, NULL} }; @@ -772,6 +309,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->default_permissions = true; break; + case OPT_REDIRECT_DIR_ON: + config->redirect_dir = true; + break; + + case OPT_REDIRECT_DIR_OFF: + config->redirect_dir = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -809,12 +354,9 @@ retry: strlen(OVL_WORKDIR_NAME)); if (!IS_ERR(work)) { - struct kstat stat = { - .mode = S_IFDIR | 0, - }; struct iattr attr = { .ia_valid = ATTR_MODE, - .ia_mode = stat.mode, + .ia_mode = S_IFDIR | 0, }; if (work->d_inode) { @@ -828,7 +370,9 @@ retry: goto retry; } - err = ovl_create_real(dir, work, &stat, NULL, NULL, true); + err = ovl_create_real(dir, work, + &(struct cattr){.mode = S_IFDIR | 0}, + NULL, true); if (err) goto out_dput; @@ -903,7 +447,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) pr_err("overlayfs: filesystem on '%s' not supported\n", name); goto out_put; } - if (!S_ISDIR(path->dentry->d_inode->i_mode)) { + if (!d_is_dir(path->dentry)) { pr_err("overlayfs: '%s' not a directory\n", name); goto out_put; } @@ -936,22 +480,33 @@ static int ovl_mount_dir(const char *name, struct path *path) return err; } -static int ovl_lower_dir(const char *name, struct path *path, long *namelen, - int *stack_depth, bool *remote) +static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, + const char *name) { - int err; struct kstatfs statfs; + int err = vfs_statfs(path, &statfs); + + if (err) + pr_err("overlayfs: statfs failed on '%s'\n", name); + else + ofs->namelen = max(ofs->namelen, statfs.f_namelen); + + return err; +} + +static int ovl_lower_dir(const char *name, struct path *path, + struct ovl_fs *ofs, int *stack_depth, bool *remote) +{ + int err; err = ovl_mount_dir_noesc(name, path); if (err) goto out; - err = vfs_statfs(path, &statfs); - if (err) { - pr_err("overlayfs: statfs failed on '%s'\n", name); + err = ovl_check_namelen(path, ofs, name); + if (err) goto out_put; - } - *namelen = max(*namelen, statfs.f_namelen); + *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); if (ovl_dentry_remote(path->dentry)) @@ -1067,7 +622,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return -EPERM; + return -EOPNOTSUPP; } static int ovl_own_xattr_set(const struct xattr_handler *handler, @@ -1075,7 +630,7 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - return -EPERM; + return -EOPNOTSUPP; } static int ovl_other_xattr_get(const struct xattr_handler *handler, @@ -1153,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs) goto out; + ufs->config.redirect_dir = ovl_redirect_dir_def; err = ovl_parse_opt((char *) data, &ufs->config); if (err) goto out_free_config; @@ -1183,6 +739,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_put_upperpath; } + err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); + if (err) + goto out_put_upperpath; + err = ovl_mount_dir(ufs->config.workdir, &workpath); if (err) goto out_put_upperpath; @@ -1214,15 +774,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_free_lowertmp; } + err = -ENOMEM; stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); if (!stack) goto out_free_lowertmp; + err = -EINVAL; lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { - err = ovl_lower_dir(lower, &stack[numlower], - &ufs->lower_namelen, &sb->s_stack_depth, - &remote); + err = ovl_lower_dir(lower, &stack[numlower], ufs, + &sb->s_stack_depth, &remote); if (err) goto out_put_lowerpath; @@ -1324,7 +885,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = ufs; sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; - root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR)); + root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); if (!root_dentry) goto out_free_oe; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c new file mode 100644 index 000000000000..952286f4826c --- /dev/null +++ b/fs/overlayfs/util.c @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2011 Novell Inc. + * Copyright (C) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/slab.h> +#include <linux/xattr.h> +#include "overlayfs.h" +#include "ovl_entry.h" + +int ovl_want_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + return mnt_want_write(ofs->upper_mnt); +} + +void ovl_drop_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + mnt_drop_write(ofs->upper_mnt); +} + +struct dentry *ovl_workdir(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + return ofs->workdir; +} + +const struct cred *ovl_override_creds(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return override_creds(ofs->creator_cred); +} + +struct ovl_entry *ovl_alloc_entry(unsigned int numlower) +{ + size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); + struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); + + if (oe) + oe->numlower = numlower; + + return oe; +} + +bool ovl_dentry_remote(struct dentry *dentry) +{ + return dentry->d_flags & + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | + DCACHE_OP_REAL); +} + +bool ovl_dentry_weird(struct dentry *dentry) +{ + return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | + DCACHE_MANAGE_TRANSIT | + DCACHE_OP_HASH | + DCACHE_OP_COMPARE); +} + +enum ovl_path_type ovl_path_type(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + enum ovl_path_type type = 0; + + if (oe->__upperdentry) { + type = __OVL_PATH_UPPER; + + /* + * Non-dir dentry can hold lower dentry from previous + * location. + */ + if (oe->numlower && d_is_dir(dentry)) + type |= __OVL_PATH_MERGE; + } else { + if (oe->numlower > 1) + type |= __OVL_PATH_MERGE; + } + return type; +} + +void ovl_path_upper(struct dentry *dentry, struct path *path) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + + path->mnt = ofs->upper_mnt; + path->dentry = ovl_upperdentry_dereference(oe); +} + +void ovl_path_lower(struct dentry *dentry, struct path *path) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; +} + +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) +{ + enum ovl_path_type type = ovl_path_type(dentry); + + if (!OVL_TYPE_UPPER(type)) + ovl_path_lower(dentry, path); + else + ovl_path_upper(dentry, path); + + return type; +} + +struct dentry *ovl_dentry_upper(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return ovl_upperdentry_dereference(oe); +} + +static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) +{ + return oe->numlower ? oe->lowerstack[0].dentry : NULL; +} + +struct dentry *ovl_dentry_lower(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return __ovl_dentry_lower(oe); +} + +struct dentry *ovl_dentry_real(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + struct dentry *realdentry; + + realdentry = ovl_upperdentry_dereference(oe); + if (!realdentry) + realdentry = __ovl_dentry_lower(oe); + + return realdentry; +} + +struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->cache; +} + +void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->cache = cache; +} + +bool ovl_dentry_is_opaque(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + return oe->opaque; +} + +bool ovl_dentry_is_whiteout(struct dentry *dentry) +{ + return !dentry->d_inode && ovl_dentry_is_opaque(dentry); +} + +void ovl_dentry_set_opaque(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->opaque = true; +} + +bool ovl_redirect_dir(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.redirect_dir; +} + +void ovl_clear_redirect_dir(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + ofs->config.redirect_dir = false; +} + +const char *ovl_dentry_get_redirect(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->redirect; +} + +void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + kfree(oe->redirect); + oe->redirect = redirect; +} + +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); + WARN_ON(oe->__upperdentry); + /* + * Make sure upperdentry is consistent before making it visible to + * ovl_upperdentry_dereference(). + */ + smp_wmb(); + oe->__upperdentry = upperdentry; +} + +void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper) +{ + WRITE_ONCE(inode->i_private, (unsigned long) realinode | + (is_upper ? OVL_ISUPPER_MASK : 0)); +} + +void ovl_inode_update(struct inode *inode, struct inode *upperinode) +{ + WARN_ON(!upperinode); + WARN_ON(!inode_unhashed(inode)); + WRITE_ONCE(inode->i_private, + (unsigned long) upperinode | OVL_ISUPPER_MASK); + if (!S_ISDIR(upperinode->i_mode)) + __insert_inode_hash(inode, (unsigned long) upperinode); +} + +void ovl_dentry_version_inc(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!inode_is_locked(dentry->d_inode)); + oe->version++; +} + +u64 ovl_dentry_version_get(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!inode_is_locked(dentry->d_inode)); + return oe->version; +} + +bool ovl_is_whiteout(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + return inode && IS_WHITEOUT(inode); +} + +struct file *ovl_path_open(struct path *path, int flags) +{ + return dentry_open(path, flags | O_NOATIME, current_cred()); +} diff --git a/fs/read_write.c b/fs/read_write.c index 190e0d362581..53bccd1c786e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1538,9 +1538,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (len == 0) return 0; - ret = mnt_want_write_file(file_out); - if (ret) - return ret; + sb_start_write(inode_out->i_sb); ret = -EOPNOTSUPP; if (file_out->f_op->copy_file_range) @@ -1559,7 +1557,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, inc_syscr(current); inc_syscw(current); - mnt_drop_write_file(file_out); + sb_end_write(inode_out->i_sb); return ret; } @@ -1657,15 +1655,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct inode *inode_out = file_inode(file_out); int ret; - if (inode_in->i_sb != inode_out->i_sb || - file_in->f_path.mnt != file_out->f_path.mnt) - return -EXDEV; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) return -EINVAL; + /* + * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on + * the same mount. Practically, they only need to be on the same file + * system. + */ + if (inode_in->i_sb != inode_out->i_sb) + return -EXDEV; + if (!(file_in->f_mode & FMODE_READ) || !(file_out->f_mode & FMODE_WRITE) || (file_out->f_flags & O_APPEND)) @@ -1685,10 +1687,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, if (pos_in + len > i_size_read(inode_in)) return -EINVAL; - ret = mnt_want_write_file(file_out); - if (ret) - return ret; - ret = file_in->f_op->clone_file_range(file_in, pos_in, file_out, pos_out, len); if (!ret) { @@ -1696,7 +1694,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, fsnotify_modify(file_out); } - mnt_drop_write_file(file_out); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index b84230e070be..83de8b6601ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1731,6 +1731,19 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + sb_start_write(file_inode(file_out)->i_sb); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + sb_end_write(file_inode(file_out)->i_sb); + + return ret; +} + struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1752d6b10ac4..310882fb698e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; - if (dentry && d_real_inode(dentry) == i) { + if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); goto found; } @@ -913,7 +913,7 @@ static struct sock *unix_find_other(struct net *net, err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - inode = d_real_inode(path.dentry); + inode = d_backing_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; @@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = path; list = &unix_socket_table[hash]; |