From 4ab30319fd7c691a1b3165325c647a5cd6d282ac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: Revert "vfs: add flags to d_real()" This reverts commit 495e642939114478a5237a7d91661ba93b76f15a. No user of "flags" argument of d_real() remain. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2c391338c675..24e1a4f37c83 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -22,7 +22,7 @@ prototypes: struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int, unsigned int); + unsigned int); locking rules: rename_lock ->d_lock may block rcu-walk diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 829a7b7857a4..6417b161f88d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -1001,7 +1001,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int, unsigned int); + unsigned int); }; d_revalidate: called when the VFS needs to revalidate a dentry. This -- cgit v1.2.3 From fb16043b46831a75c9b076a7262ae035290b0409 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:44 +0200 Subject: vfs: remove open_flags from d_real() Opening regular files on overlayfs is now handled via ovl_open(). Remove the now unused "open_flags" argument from d_op->d_real() and the d_real() helper. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/Locking | 3 +-- Documentation/filesystems/vfs.txt | 16 ++++------------ fs/overlayfs/super.c | 36 +++--------------------------------- include/linux/dcache.h | 11 ++++------- include/linux/fs.h | 2 +- 5 files changed, 13 insertions(+), 55 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 24e1a4f37c83..08a36e14e8fc 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -21,8 +21,7 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int); + struct dentry *(*d_real)(struct dentry *, const struct inode *); locking rules: rename_lock ->d_lock may block rcu-walk diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6417b161f88d..497609f00e3e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -1000,8 +1000,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int); + struct dentry *(*d_real)(struct dentry *, const struct inode *); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -1135,22 +1134,15 @@ struct dentry_operations { dentry being transited from. d_real: overlay/union type filesystems implement this method to return one of - the underlying dentries hidden by the overlay. It is used in three + the underlying dentries hidden by the overlay. It is used in two different modes: - Called from open it may need to copy-up the file depending on the - supplied open flags. This mode is selected with a non-zero flags - argument. In this mode the d_real method can return an error. - Called from file_dentry() it returns the real dentry matching the inode argument. The real dentry may be from a lower layer already copied up, but still referenced from the file. This mode is selected with a - non-NULL inode argument. This will always succeed. - - With NULL inode and zero flags the topmost real underlying dentry is - returned. This will always succeed. + non-NULL inode argument. - This method is never called with both non-NULL inode and non-zero flags. + With NULL inode the topmost real underlying dentry is returned. Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c63beccad4fc..0e84593af84e 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -74,28 +74,10 @@ static void ovl_dentry_release(struct dentry *dentry) } } -static int ovl_check_append_only(struct inode *inode, int flag) -{ - /* - * This test was moot in vfs may_open() because overlay inode does - * not have the S_APPEND flag, so re-check on real upper inode - */ - if (IS_APPEND(inode)) { - if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) - return -EPERM; - if (flag & O_TRUNC) - return -EPERM; - } - - return 0; -} - static struct dentry *ovl_d_real(struct dentry *dentry, - const struct inode *inode, - unsigned int open_flags) + const struct inode *inode) { struct dentry *real; - int err; /* It's an overlay file */ if (inode && d_inode(dentry) == inode) @@ -107,28 +89,16 @@ static struct dentry *ovl_d_real(struct dentry *dentry, goto bug; } - if (open_flags) { - err = ovl_open_maybe_copy_up(dentry, open_flags); - if (err) - return ERR_PTR(err); - } - real = ovl_dentry_upper(dentry); - if (real && (!inode || inode == d_inode(real))) { - if (!inode) { - err = ovl_check_append_only(d_inode(real), open_flags); - if (err) - return ERR_PTR(err); - } + if (real && (!inode || inode == d_inode(real))) return real; - } real = ovl_dentry_lower(dentry); if (!real) goto bug; /* Handle recursion */ - real = d_real(real, inode, open_flags); + real = d_real(real, inode); if (!inode || inode == d_inode(real)) return real; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8fe4efa94af6..78cea80423a3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -145,8 +145,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int); + struct dentry *(*d_real)(struct dentry *, const struct inode *); } ____cacheline_aligned; /* @@ -568,7 +567,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * d_real - Return the real dentry * @dentry: the dentry to query * @inode: inode to select the dentry from multiple layers (can be NULL) - * @flags: open flags to control copy-up behavior * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. @@ -576,11 +574,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * See also: Documentation/filesystems/vfs.txt */ static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode, - unsigned int flags) + const struct inode *inode) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode, flags); + return dentry->d_op->d_real(dentry, inode); else return dentry; } @@ -595,7 +592,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); + return d_backing_inode(d_real((struct dentry *) dentry, NULL)); } struct name_snapshot { diff --git a/include/linux/fs.h b/include/linux/fs.h index 1cbcf37c45e1..1fa63d184d1f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1239,7 +1239,7 @@ static inline struct inode *file_inode(const struct file *f) static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file), 0); + return d_real(file->f_path.dentry, file_inode(file)); } static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) -- cgit v1.2.3 From 0c31d675aad949e5dfecf7a32813514423e6c766 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:44 +0200 Subject: ovl: fix documentation of non-standard behavior We can now drop description of the ro/rw inconsistency from the documentation. Also clarify, that now fully standard compliant behavior can be enabled with kernel/module/mount options. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 51 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 72615a2c0752..c2c71be75e3d 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -10,10 +10,6 @@ union-filesystems). An overlay-filesystem tries to present a filesystem which is the result over overlaying one filesystem on top of the other. -The result will inevitably fail to look exactly like a normal -filesystem for various technical reasons. The expectation is that -many use cases will be able to ignore these differences. - Overlay objects --------------- @@ -306,27 +302,40 @@ the copied layers will fail the verification of the lower root file handle. Non-standard behavior --------------------- -The copy_up operation essentially creates a new, identical file and -moves it over to the old name. Any open files referring to this inode -will access the old data. +Overlayfs can now act as a POSIX compliant filesystem with the following +features turned on: + +1) "redirect_dir" + +Enabled with the mount option or module option: "redirect_dir=on" or with +the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y. + +If this feature is disabled, then rename(2) on a lower or merged directory +will fail with EXDEV ("Invalid cross-device link"). + +2) "inode index" + +Enabled with the mount option or module option "index=on" or with the +kernel config option CONFIG_OVERLAY_FS_INDEX=y. -The new file may be on a different filesystem, so both st_dev and st_ino -of the real file may change. The values of st_dev and st_ino returned by -stat(2) on an overlay object are often not the same as the real file -stat(2) values to prevent the values from changing on copy_up. +If this feature is disabled and a file with multiple hard links is copied +up, then this will "break" the link. Changes will not be propagated to +other names referring to the same inode. -Unless "xino" feature is enabled, when overlay layers are not all on the -same underlying filesystem, the value of st_dev may be different for two -non-directory objects in the same overlay filesystem and the value of -st_ino for directory objects may be non persistent and could change even -while the overlay filesystem is still mounted. +3) "xino" -Unless "inode index" feature is enabled, if a file with multiple hard -links is copied up, then this will "break" the link. Changes will not be -propagated to other names referring to the same inode. +Enabled with the mount option "xino=auto" or "xino=on", with the module +option "xino_auto=on" or with the kernel config option +CONFIG_OVERLAY_FS_XINO_AUTO=y. Also implicitly enabled by using the same +underlying filesystem for all layers making up the overlay. -Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged -directory will fail with EXDEV. +If this feature is disabled or the underlying filesystem doesn't have +enough free bits in the inode number, then overlayfs will not be able to +guarantee that the values of st_ino and st_dev returned by stat(2) and the +value of d_ino returned by readdir(3) will act like on a normal filesystem. +E.g. the value of st_dev may be different for two objects in the same +overlay filesystem and the value of st_ino for directory objects may not be +persistent and could change even while the overlay filesystem is mounted. Changes to underlying filesystems -- cgit v1.2.3 From d5791044d2e5749ef4de84161cec5532e2111540 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 11 May 2018 11:49:27 -0400 Subject: ovl: Provide a mount option metacopy=on/off for metadata copyup By default metadata only copy up is disabled. Provide a mount option so that users can choose one way or other. Also provide a kernel config and module option to enable/disable metacopy feature. metacopy feature requires redirect_dir=on when upper is present. Otherwise, it requires redirect_dir=follow atleast. As of now, metacopy does not work with nfs_export=on. So if both metacopy=on and nfs_export=on then nfs_export is disabled. Signed-off-by: Vivek Goyal Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 30 ++++++++++++++++++++- fs/overlayfs/Kconfig | 19 ++++++++++++++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/super.c | 46 ++++++++++++++++++++++++++++++--- 4 files changed, 92 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index c2c71be75e3d..51c136c821bf 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -262,6 +262,30 @@ rightmost one and going left. In the above example lower1 will be the top, lower2 the middle and lower3 the bottom layer. +Metadata only copy up +-------------------- + +When metadata only copy up feature is enabled, overlayfs will only copy +up metadata (as opposed to whole file), when a metadata specific operation +like chown/chmod is performed. Full file will be copied up later when +file is opened for WRITE operation. + +In other words, this is delayed data copy up operation and data is copied +up when there is a need to actually modify data. + +There are multiple ways to enable/disable this feature. A config option +CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature +by default. Or one can enable/disable it at module load time with module +parameter metacopy=on/off. Lastly, there is also a per mount option +metacopy=on/off to enable/disable this feature per mount. + +Do not use metacopy=on with untrusted upper/lower directories. Otherwise +it is possible that an attacker can create a handcrafted file with +appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower +pointed by REDIRECT. This should not be possible on local system as setting +"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible +for untrusted layers like from a pen drive. + Sharing and copying layers -------------------------- @@ -280,7 +304,7 @@ though it will not result in a crash or deadlock. Mounting an overlay using an upper layer path, where the upper layer path was previously used by another mounted overlay in combination with a different lower layer path, is allowed, unless the "inodes index" feature -is enabled. +or "metadata only copy up" feature is enabled. With the "inodes index" feature, on the first time mount, an NFS file handle of the lower layer root directory, along with the UUID of the lower @@ -293,6 +317,10 @@ lower root origin, mount will fail with ESTALE. An overlayfs mount with does not support NFS export, lower filesystem does not have a valid UUID or if the upper filesystem does not support extended attributes. +For "metadata only copy up" feature there is no verification mechanism at +mount time. So if same upper is mounted with different set of lower, mount +probably will succeed but expect the unexpected later on. So don't do it. + It is quite a common practice to copy overlay layers to a different directory tree on the same or different underlying filesystem, and even to a different machine. With the "inodes index" feature, trying to mount diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 9384164253ac..2ef91be2a04e 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT bool "Overlayfs: turn on NFS export feature by default" depends on OVERLAY_FS depends on OVERLAY_FS_INDEX + depends on !OVERLAY_FS_METACOPY help If this config option is enabled then overlay filesystems will use the index directory to decode overlay NFS file handles by default. @@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO For more information, see Documentation/filesystems/overlayfs.txt If unsure, say N. + +config OVERLAY_FS_METACOPY + bool "Overlayfs: turn on metadata only copy up feature by default" + depends on OVERLAY_FS + select OVERLAY_FS_REDIRECT_DIR + help + If this config option is enabled then overlay filesystems will + copy up only metadata where appropriate and data copy up will + happen when a file is opened for WRITE operation. It is still + possible to turn off this feature globally with the "metacopy=off" + module option or on a filesystem instance basis with the + "metacopy=off" mount option. + + Note, that this feature is not backward compatible. That is, + mounting an overlay which has metacopy only inodes on a kernel + that doesn't support this feature will have unexpected results. + + If unsure, say N. diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 41655a7d6894..ea4134e97d0d 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -19,6 +19,7 @@ struct ovl_config { bool index; bool nfs_export; int xino; + bool metacopy; }; struct ovl_sb { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0e84593af84e..d4caeee051ee 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe) dput(oe->lowerstack[i].dentry); } +static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); +module_param_named(metacopy, ovl_metacopy_def, bool, 0644); +MODULE_PARM_DESC(ovl_metacopy_def, + "Default to on or off for the metadata only copy up feature"); + static void ovl_dentry_release(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; @@ -347,6 +352,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) "on" : "off"); if (ofs->config.xino != ovl_xino_def()) seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); + if (ofs->config.metacopy != ovl_metacopy_def) + seq_printf(m, ",metacopy=%s", + ofs->config.metacopy ? "on" : "off"); return 0; } @@ -384,6 +392,8 @@ enum { OPT_XINO_ON, OPT_XINO_OFF, OPT_XINO_AUTO, + OPT_METACOPY_ON, + OPT_METACOPY_OFF, OPT_ERR, }; @@ -400,6 +410,8 @@ static const match_table_t ovl_tokens = { {OPT_XINO_ON, "xino=on"}, {OPT_XINO_OFF, "xino=off"}, {OPT_XINO_AUTO, "xino=auto"}, + {OPT_METACOPY_ON, "metacopy=on"}, + {OPT_METACOPY_OFF, "metacopy=off"}, {OPT_ERR, NULL} }; @@ -452,6 +464,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; + int err; config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); if (!config->redirect_mode) @@ -526,6 +539,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->xino = OVL_XINO_AUTO; break; + case OPT_METACOPY_ON: + config->metacopy = true; + break; + + case OPT_METACOPY_OFF: + config->metacopy = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -540,7 +561,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->workdir = NULL; } - return ovl_parse_redirect_mode(config, config->redirect_mode); + err = ovl_parse_redirect_mode(config, config->redirect_mode); + if (err) + return err; + + /* metacopy feature with upper requires redirect_dir=on */ + if (config->upperdir && config->metacopy && !config->redirect_dir) { + pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n"); + config->metacopy = false; + } else if (config->metacopy && !config->redirect_follow) { + pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n"); + config->metacopy = false; + } + + return 0; } #define OVL_WORKDIR_NAME "work" @@ -1013,7 +1047,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) if (err) { ofs->noxattr = true; ofs->config.index = false; - pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); + ofs->config.metacopy = false; + pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); @@ -1035,7 +1070,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } - out: mnt_drop_write(mnt); return err; @@ -1346,6 +1380,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ofs->config.index = ovl_index_def; ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); + ofs->config.metacopy = ovl_metacopy_def; err = ovl_parse_opt((char *) data, &ofs->config); if (err) goto out_err; @@ -1416,6 +1451,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } } + if (ofs->config.metacopy && ofs->config.nfs_export) { + pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + if (ofs->config.nfs_export) sb->s_export_op = &ovl_export_operations; -- cgit v1.2.3