From 764baba80168ad3adafb521d2ab483ccbc49e344 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 4 Feb 2018 15:35:09 +0200 Subject: ovl: hash non-dir by lower inode for fsnotify Commit 31747eda41ef ("ovl: hash directory inodes for fsnotify") fixed an issue of inotify watch on directory that stops getting events after dropping dentry caches. A similar issue exists for non-dir non-upper files, for example: $ mkdir -p lower upper work merged $ touch lower/foo $ mount -t overlay -o lowerdir=lower,workdir=work,upperdir=upper none merged $ inotifywait merged/foo & $ echo 2 > /proc/sys/vm/drop_caches $ cat merged/foo inotifywait doesn't get the OPEN event, because ovl_lookup() called from 'cat' allocates a new overlay inode and does not reuse the watched inode. Fix this by hashing non-dir overlay inodes by lower real inode in the following cases that were not hashed before this change: - A non-upper overlay mount - A lower non-hardlink when index=off A helper ovl_hash_bylower() was added to put all the logic and documentation about which real inode an overlay inode is hashed by into one place. The issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13. Cc: #v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 58 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index fcd97b783fa1..3b1bd469accd 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, return inode; } +/* + * Does overlay inode need to be hashed by lower inode? + */ +static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, + struct dentry *lower, struct dentry *index) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + /* No, if pure upper */ + if (!lower) + return false; + + /* Yes, if already indexed */ + if (index) + return true; + + /* Yes, if won't be copied up */ + if (!ofs->upper_mnt) + return true; + + /* No, if lower hardlink is or will be broken on copy up */ + if ((upper || !ovl_indexdir(sb)) && + !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return false; + + /* No, if non-indexed upper with NFS export */ + if (sb->s_export_op && upper) + return false; + + /* Otherwise, hash by lower inode for fsnotify */ + return true; +} + struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower) { - struct ovl_fs *ofs = sb->s_fs_info; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; - /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); - struct dentry *origin = indexed ? lowerdentry : NULL; + bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); bool is_dir; - if (WARN_ON(upperdentry && indexed && !lowerdentry)) - return ERR_PTR(-EIO); - if (!realinode) realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed non-dir upper, but - * we must not use lower as hash key in that case. - * Hash non-dir that is or could be indexed by origin inode. - * Hash dir that is or could be merged by origin inode. - * Hash pure upper and non-indexed non-dir by upper inode. - * Hash non-indexed dir by upper inode for NFS export. + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key if this is a broken hardlink. */ is_dir = S_ISDIR(realinode->i_mode); - if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) - origin = lowerdentry; - - if (upperdentry || origin) { - struct inode *key = d_inode(origin ?: upperdentry); + if (upperdentry || bylower) { + struct inode *key = d_inode(bylower ? lowerdentry : + upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = iget5_locked(sb, (unsigned long) key, @@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { + /* Lower hardlink that will be broken on copy up */ inode = new_inode(sb); if (!inode) goto out_nomem; -- cgit v1.2.3 From 2ca3c148a06244d46dcfc95c5965644c83a30b37 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Jan 2018 13:31:09 +0200 Subject: ovl: check lower ancestry on encode of lower dir file handle This change relaxes copy up on encode of merge dir with lower layer > 1 and handles the case of encoding a merge dir with lower layer 1, where an ancestor is a non-indexed merge dir. In that case, decode of the lower file handle will not have been possible if the non-indexed ancestor is redirected before or after encode. Before encoding a non-upper directory file handle from real layer N, we need to check if it will be possible to reconnect an overlay dentry from the real lower decoded dentry. This is done by following the overlay ancestry up to a "layer N connected" ancestor and verifying that all parents along the way are "layer N connectable". If an ancestor that is NOT "layer N connectable" is found, we need to copy up an ancestor, which is "layer N connectable", thus making that ancestor "layer N connected". For example: layer 1: /a layer 2: /a/b/c The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is copied up and renamed, upper dir /a will be indexed by lower dir /a from layer 1. The dir /a from layer 2 will never be indexed, so the algorithm in ovl_lookup_real_ancestor() (*) will not be able to lookup a connected overlay dentry from the connected lower dentry /a/b/c. To avoid this problem on decode time, we need to copy up an ancestor of /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected" and when the time comes to decode the file handle from lower dentry /a/b/c, ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding a connected overlay dentry will be accomplished. (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an entry /a in the lower layers above layer N and find the indexed dir /a from layer 1. If that improvement is made, then the check for "layer N connected" will need to verify there are no redirects in lower layers above layer N. In the example above, /a will be "layer 2 connectable". However, if layer 2 dir /a is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable": layer 1: /A (redirect = /a) layer 2: /a/b/c Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 210 +++++++++++++++++++++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/super.c | 1 + 3 files changed, 168 insertions(+), 44 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index bb94ce9da5c8..9df455ca59a8 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -19,6 +19,142 @@ #include #include "overlayfs.h" +static int ovl_encode_maybe_copy_up(struct dentry *dentry) +{ + int err; + + if (ovl_dentry_upper(dentry)) + return 0; + + err = ovl_want_write(dentry); + if (!err) { + err = ovl_copy_up(dentry); + ovl_drop_write(dentry); + } + + if (err) { + pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n", + dentry, err); + } + + return err; +} + +/* + * Before encoding a non-upper directory file handle from real layer N, we need + * to check if it will be possible to reconnect an overlay dentry from the real + * lower decoded dentry. This is done by following the overlay ancestry up to a + * "layer N connected" ancestor and verifying that all parents along the way are + * "layer N connectable". If an ancestor that is NOT "layer N connectable" is + * found, we need to copy up an ancestor, which is "layer N connectable", thus + * making that ancestor "layer N connected". For example: + * + * layer 1: /a + * layer 2: /a/b/c + * + * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is + * copied up and renamed, upper dir /a will be indexed by lower dir /a from + * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*) + * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay + * dentry from the connected lower dentry /a/b/c. + * + * To avoid this problem on decode time, we need to copy up an ancestor of + * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is + * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected" + * and when the time comes to decode the file handle from lower dentry /a/b/c, + * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding + * a connected overlay dentry will be accomplished. + * + * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an + * entry /a in the lower layers above layer N and find the indexed dir /a from + * layer 1. If that improvement is made, then the check for "layer N connected" + * will need to verify there are no redirects in lower layers above N. In the + * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a + * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable": + * + * layer 1: /A (redirect = /a) + * layer 2: /a/b/c + */ + +/* Return the lowest layer for encoding a connectable file handle */ +static int ovl_connectable_layer(struct dentry *dentry) +{ + struct ovl_entry *oe = OVL_E(dentry); + + /* We can get overlay root from root of any layer */ + if (dentry == dentry->d_sb->s_root) + return oe->numlower; + + /* + * If it's an unindexed merge dir, then it's not connectable with any + * lower layer + */ + if (ovl_dentry_upper(dentry) && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + return 0; + + /* We can get upper/overlay path from indexed/lower dentry */ + return oe->lowerstack[0].layer->idx; +} + +/* + * @dentry is "connected" if all ancestors up to root or a "connected" ancestor + * have the same uppermost lower layer as the origin's layer. We may need to + * copy up a "connectable" ancestor to make it "connected". A "connected" dentry + * cannot become non "connected", so cache positive result in dentry flags. + * + * Return the connected origin layer or < 0 on error. + */ +static int ovl_connect_layer(struct dentry *dentry) +{ + struct dentry *next, *parent = NULL; + int origin_layer; + int err = 0; + + if (WARN_ON(dentry == dentry->d_sb->s_root) || + WARN_ON(!ovl_dentry_lower(dentry))) + return -EIO; + + origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx; + if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry)) + return origin_layer; + + /* Find the topmost origin layer connectable ancestor of @dentry */ + next = dget(dentry); + for (;;) { + parent = dget_parent(next); + if (WARN_ON(parent == next)) { + err = -EIO; + break; + } + + /* + * If @parent is not origin layer connectable, then copy up + * @next which is origin layer connectable and we are done. + */ + if (ovl_connectable_layer(parent) < origin_layer) { + err = ovl_encode_maybe_copy_up(next); + break; + } + + /* If @parent is connected or indexed we are done */ + if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) || + ovl_test_flag(OVL_INDEX, d_inode(parent))) + break; + + dput(next); + next = parent; + } + + dput(parent); + dput(next); + + if (!err) + ovl_dentry_set_flag(OVL_E_CONNECTED, dentry); + + return err ?: origin_layer; +} + /* * We only need to encode origin if there is a chance that the same object was * encoded pre copy up and then we need to stay consistent with the same @@ -41,73 +177,59 @@ * L = lower file handle * * (*) Connecting an overlay dir from real lower dentry is not always - * possible when there are redirects in lower layers. To mitigate this case, - * we copy up the lower dir first and then encode an upper dir file handle. + * possible when there are redirects in lower layers and non-indexed merge dirs. + * To mitigate those case, we may copy up the lower dir ancestor before encode + * a lower dir file handle. + * + * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. */ -static bool ovl_should_encode_origin(struct dentry *dentry) +static int ovl_check_encode_origin(struct dentry *dentry) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + /* Upper file handle for pure upper */ if (!ovl_dentry_lower(dentry)) - return false; + return 0; /* - * Decoding a merge dir, whose origin's parent is under a redirected - * lower dir is not always possible. As a simple aproximation, we do - * not encode lower dir file handles when overlay has multiple lower - * layers and origin is below the topmost lower layer. + * Upper file handle for non-indexed upper. * - * TODO: copy up only the parent that is under redirected lower. + * Root is never indexed, so if there's an upper layer, encode upper for + * root. */ - if (d_is_dir(dentry) && ofs->upper_mnt && - OVL_E(dentry)->lowerstack[0].layer->idx > 1) - return false; - - /* Decoding a non-indexed upper from origin is not implemented */ if (ovl_dentry_upper(dentry) && !ovl_test_flag(OVL_INDEX, d_inode(dentry))) - return false; - - return true; -} - -static int ovl_encode_maybe_copy_up(struct dentry *dentry) -{ - int err; - - if (ovl_dentry_upper(dentry)) return 0; - err = ovl_want_write(dentry); - if (err) - return err; - - err = ovl_copy_up(dentry); + /* + * Decoding a merge dir, whose origin's ancestor is under a redirected + * lower dir or under a non-indexed upper is not always possible. + * ovl_connect_layer() will try to make origin's layer "connected" by + * copying up a "connectable" ancestor. + */ + if (d_is_dir(dentry) && ofs->upper_mnt) + return ovl_connect_layer(dentry); - ovl_drop_write(dentry); - return err; + /* Lower file handle for indexed and non-upper dir/non-dir */ + return 1; } static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) { - struct dentry *origin = ovl_dentry_lower(dentry); struct ovl_fh *fh = NULL; - int err; + int err, enc_lower; /* - * If we should not encode a lower dir file handle, copy up and encode - * an upper dir file handle. + * Check if we should encode a lower or upper file handle and maybe + * copy up an ancestor to make lower file handle connectable. */ - if (!ovl_should_encode_origin(dentry)) { - err = ovl_encode_maybe_copy_up(dentry); - if (err) - goto fail; - - origin = NULL; - } + err = enc_lower = ovl_check_encode_origin(dentry); + if (enc_lower < 0) + goto fail; - /* Encode an upper or origin file handle */ - fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); + /* Encode an upper or lower file handle */ + fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) : + ovl_dentry_upper(dentry), !enc_lower); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0df25a9c94bd..225ff1171147 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -40,6 +40,7 @@ enum ovl_inode_flag { enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, + OVL_E_CONNECTED, }; /* diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9ee37c76091d..7c24619ae7fc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); + ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, ovl_dentry_lower(root_dentry)); -- cgit v1.2.3 From 7168179fcf25f7812e8541decac686a91359e522 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Jan 2018 14:30:50 +0200 Subject: ovl: check ERR_PTR() return value from ovl_lookup_real() Reported-by: Dan Carpenter Fixes: 061701540349 ("ovl: lookup indexed ancestor of lower dir") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 9df455ca59a8..97a916ea8b86 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -477,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, dput(upper); } - if (!this) - return NULL; + if (IS_ERR_OR_NULL(this)) + return this; if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { dput(this); -- cgit v1.2.3 From b5095f24e791c2d05da7cbb3d99e2b420b36a273 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 6 Feb 2018 00:25:16 +0800 Subject: ovl: fix ptr_ret.cocci warnings fs/overlayfs/export.c:459:10-16: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Fixes: 4b91c30a5a19 ("ovl: lookup connected ancestor of dir in inode cache") CC: Amir Goldstein Signed-off-by: Fengguang Wu Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 97a916ea8b86..87bd4148f4fb 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -620,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, if (err == -ECHILD) { this = ovl_lookup_real_ancestor(sb, real, layer); - err = IS_ERR(this) ? PTR_ERR(this) : 0; + err = PTR_ERR_OR_ZERO(this); } if (!err) { dput(connected); -- cgit v1.2.3 From d1fe96c0e4de78ba0cd336ea3df3b850d06b9b9a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 2 Feb 2018 10:23:24 -0500 Subject: ovl: redirect_dir=nofollow should not follow redirect for opaque lower redirect_dir=nofollow should not follow a redirect. But in a specific configuration it can still follow it. For example try this. $ mkdir -p lower0 lower1/foo upper work merged $ touch lower1/foo/lower-file.txt $ setfattr -n "trusted.overlay.opaque" -v "y" lower1/foo $ mount -t overlay -o lowerdir=lower1:lower0,workdir=work,upperdir=upper,redirect_dir=on none merged $ cd merged $ mv foo foo-renamed $ umount merged # mount again. This time with redirect_dir=nofollow $ mount -t overlay -o lowerdir=lower1:lower0,workdir=work,upperdir=upper,redirect_dir=nofollow none merged $ ls merged/foo-renamed/ # This lists lower-file.txt, while it should not have. Basically, we are doing redirect check after we check for d.stop. And if this is not last lower, and we find an opaque lower, d.stop will be set. ovl_lookup_single() if (!d->last && ovl_is_opaquedir(this)) { d->stop = d->opaque = true; goto out; } To fix this, first check redirect is allowed. And after that check if d.stop has been set or not. Signed-off-by: Vivek Goyal Fixes: 438c84c2f0c7 ("ovl: don't follow redirects if redirect_dir=off") Cc: #v4.15 Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index de3e6da1d5a5..70fcfcc684cc 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, stack[ctr].layer = lower.layer; ctr++; - if (d.stop) - break; - /* * Following redirects can have security consequences: it's like * a symlink into the lower layer without the permission checks. @@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out_put; } + if (d.stop) + break; + if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; /* Find the current layer on the root dentry */ -- cgit v1.2.3 From 36cd95dfa1ed76b59e2dcaafaa89c5479cec32b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 Mar 2018 11:47:15 +0100 Subject: ovl: update Kconfig texts Add some hints about overlayfs kernel config options. Enabling NFS export by default is especially recommended against, as it incurs a performance penalty even if the filesystem is not actually exported. Signed-off-by: Miklos Szeredi --- fs/overlayfs/Kconfig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 406e72de88f6..ce6ff5a0a6e4 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -24,6 +24,8 @@ config OVERLAY_FS_REDIRECT_DIR an overlay which has redirects on a kernel that doesn't support this feature will have unexpected results. + If unsure, say N. + config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW bool "Overlayfs: follow redirects even if redirects are turned off" default y @@ -32,8 +34,13 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW Disable this to get a possibly more secure configuration, but that might not be backward compatible with previous kernels. + If backward compatibility is not an issue, then it is safe and + recommended to say N here. + For more information, see Documentation/filesystems/overlayfs.txt + If unsure, say Y. + config OVERLAY_FS_INDEX bool "Overlayfs: turn on inodes index feature by default" depends on OVERLAY_FS @@ -51,6 +58,8 @@ config OVERLAY_FS_INDEX That is, mounting an overlay which has an inodes index on a kernel that doesn't support this feature will have unexpected results. + If unsure, say N. + config OVERLAY_FS_NFS_EXPORT bool "Overlayfs: turn on NFS export feature by default" depends on OVERLAY_FS @@ -72,3 +81,8 @@ config OVERLAY_FS_NFS_EXPORT Note, that the NFS export feature is not backward compatible. That is, mounting an overlay which has a full index on a kernel that doesn't support this feature will have unexpected results. + + Most users should say N here and enable this feature on a case-by- + case basis with the "nfs_export=on" mount option. + + Say N unless you fully understand the consequences. -- cgit v1.2.3