diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-12 17:11:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-12 17:11:47 -0800 |
commit | 33caf82acf4dc420bf0f0136b886f7b27ecf90c5 (patch) | |
tree | b24b0b5c8f257ae7db3b8df939821a0856869895 /fs/namei.c | |
parent | ca9706a282943899981e83604f2ed13e88ce4239 (diff) | |
parent | bbddca8e8fac07ece3938e03526b5d00fa791a4c (diff) | |
download | linux-33caf82acf4dc420bf0f0136b886f7b27ecf90c5.tar.bz2 |
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull misc vfs updates from Al Viro:
"All kinds of stuff. That probably should've been 5 or 6 separate
branches, but by the time I'd realized how large and mixed that bag
had become it had been too close to -final to play with rebasing.
Some fs/namei.c cleanups there, memdup_user_nul() introduction and
switching open-coded instances, burying long-dead code, whack-a-mole
of various kinds, several new helpers for ->llseek(), assorted
cleanups and fixes from various people, etc.
One piece probably deserves special mention - Neil's
lookup_one_len_unlocked(). Similar to lookup_one_len(), but gets
called without ->i_mutex and tries to avoid ever taking it. That, of
course, means that it's not useful for any directory modifications,
but things like getting inode attributes in nfds readdirplus are fine
with that. I really should've asked for moratorium on lookup-related
changes this cycle, but since I hadn't done that early enough... I
*am* asking for that for the coming cycle, though - I'm going to try
and get conversion of i_mutex to rwsem with ->lookup() done under lock
taken shared.
There will be a patch closer to the end of the window, along the lines
of the one Linus had posted last May - mechanical conversion of
->i_mutex accesses to inode_lock()/inode_unlock()/inode_trylock()/
inode_is_locked()/inode_lock_nested(). To quote Linus back then:
-----
| This is an automated patch using
|
| sed 's/mutex_lock(&\(.*\)->i_mutex)/inode_lock(\1)/'
| sed 's/mutex_unlock(&\(.*\)->i_mutex)/inode_unlock(\1)/'
| sed 's/mutex_lock_nested(&\(.*\)->i_mutex,[ ]*I_MUTEX_\([A-Z0-9_]*\))/inode_lock_nested(\1, I_MUTEX_\2)/'
| sed 's/mutex_is_locked(&\(.*\)->i_mutex)/inode_is_locked(\1)/'
| sed 's/mutex_trylock(&\(.*\)->i_mutex)/inode_trylock(\1)/'
|
| with a very few manual fixups
-----
I'm going to send that once the ->i_mutex-affecting stuff in -next
gets mostly merged (or when Linus says he's about to stop taking
merges)"
* 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits)
nfsd: don't hold i_mutex over userspace upcalls
fs:affs:Replace time_t with time64_t
fs/9p: use fscache mutex rather than spinlock
proc: add a reschedule point in proc_readfd_common()
logfs: constify logfs_block_ops structures
fcntl: allow to set O_DIRECT flag on pipe
fs: __generic_file_splice_read retry lookup on AOP_TRUNCATED_PAGE
fs: xattr: Use kvfree()
[s390] page_to_phys() always returns a multiple of PAGE_SIZE
nbd: use ->compat_ioctl()
fs: use block_device name vsprintf helper
lib/vsprintf: add %*pg format specifier
fs: use gendisk->disk_name where possible
poll: plug an unused argument to do_poll
amdkfd: don't open-code memdup_user()
cdrom: don't open-code memdup_user()
rsxx: don't open-code memdup_user()
mtip32xx: don't open-code memdup_user()
[um] mconsole: don't open-code memdup_user_nul()
[um] hostaudio: don't open-code memdup_user()
...
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 200 |
1 files changed, 131 insertions, 69 deletions
diff --git a/fs/namei.c b/fs/namei.c index 3c909aebef70..bceefd5588a2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -534,10 +534,8 @@ static void restore_nameidata(void) current->nameidata = old; if (old) old->total_link_count = now->total_link_count; - if (now->stack != now->internal) { + if (now->stack != now->internal) kfree(now->stack); - now->stack = now->internal; - } } static int __nd_alloc_stack(struct nameidata *nd) @@ -654,7 +652,7 @@ static bool legitimize_links(struct nameidata *nd) * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab - * normal reference counts on dentries and vfsmounts to transition to rcu-walk + * normal reference counts on dentries and vfsmounts to transition to ref-walk * mode. Refcounts are grabbed at the last known good point before rcu-walk * got stuck, so ref-walk may continue from there. If this is not successful * (eg. a seqcount has changed), then failure is returned and it's up to caller @@ -804,19 +802,19 @@ static int complete_walk(struct nameidata *nd) static void set_root(struct nameidata *nd) { - get_fs_root(current->fs, &nd->root); -} - -static void set_root_rcu(struct nameidata *nd) -{ struct fs_struct *fs = current->fs; - unsigned seq; - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); + if (nd->flags & LOOKUP_RCU) { + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + } else { + get_fs_root(fs, &nd->root); + } } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -838,6 +836,26 @@ static inline void path_to_nameidata(const struct path *path, nd->path.dentry = path->dentry; } +static int nd_jump_root(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) { + struct dentry *d; + nd->path = nd->root; + d = nd->path.dentry; + nd->inode = d->d_inode; + nd->seq = nd->root_seq; + if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) + return -ECHILD; + } else { + path_put(&nd->path); + nd->path = nd->root; + path_get(&nd->path); + nd->inode = nd->path.dentry->d_inode; + } + nd->flags |= LOOKUP_JUMPED; + return 0; +} + /* * Helper to directly jump to a known parsed path from ->get_link, * caller must have taken a reference to path beforehand. @@ -1016,25 +1034,10 @@ const char *get_link(struct nameidata *nd) return res; } if (*res == '/') { - if (nd->flags & LOOKUP_RCU) { - struct dentry *d; - if (!nd->root.mnt) - set_root_rcu(nd); - nd->path = nd->root; - d = nd->path.dentry; - nd->inode = d->d_inode; - nd->seq = nd->root_seq; - if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) - return ERR_PTR(-ECHILD); - } else { - if (!nd->root.mnt) - set_root(nd); - path_put(&nd->path); - nd->path = nd->root; - path_get(&nd->root); - nd->inode = nd->path.dentry->d_inode; - } - nd->flags |= LOOKUP_JUMPED; + if (!nd->root.mnt) + set_root(nd); + if (unlikely(nd_jump_root(nd))) + return ERR_PTR(-ECHILD); while (unlikely(*++res == '/')) ; } @@ -1295,8 +1298,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; - if (!nd->root.mnt) - set_root_rcu(nd); while (1) { if (path_equal(&nd->path, &nd->root)) @@ -1416,9 +1417,6 @@ static void follow_mount(struct path *path) static int follow_dotdot(struct nameidata *nd) { - if (!nd->root.mnt) - set_root(nd); - while(1) { struct dentry *old = nd->path.dentry; @@ -1656,6 +1654,8 @@ static inline int may_lookup(struct nameidata *nd) static inline int handle_dots(struct nameidata *nd, int type) { if (type == LAST_DOTDOT) { + if (!nd->root.mnt) + set_root(nd); if (nd->flags & LOOKUP_RCU) { return follow_dotdot_rcu(nd); } else @@ -2021,18 +2021,19 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } nd->root.mnt = NULL; + nd->path.mnt = NULL; + nd->path.dentry = NULL; nd->m_seq = read_seqbegin(&mount_lock); if (*s == '/') { - if (flags & LOOKUP_RCU) { + if (flags & LOOKUP_RCU) rcu_read_lock(); - set_root_rcu(nd); - nd->seq = nd->root_seq; - } else { - set_root(nd); - path_get(&nd->root); - } - nd->path = nd->root; + set_root(nd); + if (likely(!nd_jump_root(nd))) + return s; + nd->root.mnt = NULL; + rcu_read_unlock(); + return ERR_PTR(-ECHILD); } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; @@ -2043,11 +2044,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags) do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; + nd->inode = nd->path.dentry->d_inode; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_pwd(current->fs, &nd->path); + nd->inode = nd->path.dentry->d_inode; } + return s; } else { /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(nd->dfd); @@ -2077,16 +2081,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) fdput(f); return s; } - - nd->inode = nd->path.dentry->d_inode; - if (!(flags & LOOKUP_RCU)) - return s; - if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) - return s; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - return ERR_PTR(-ECHILD); } static const char *trailing_symlink(struct nameidata *nd) @@ -2279,6 +2273,8 @@ EXPORT_SYMBOL(vfs_path_lookup); * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. + * + * The caller must hold base->i_mutex. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2322,6 +2318,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) } EXPORT_SYMBOL(lookup_one_len); +/** + * lookup_one_len_unlocked - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * Unlike lookup_one_len, it should be called without the parent + * i_mutex held, and will take the i_mutex itself if necessary. + */ +struct dentry *lookup_one_len_unlocked(const char *name, + struct dentry *base, int len) +{ + struct qstr this; + unsigned int c; + int err; + struct dentry *ret; + + this.name = name; + this.len = len; + this.hash = full_name_hash(name, len); + if (!len) + return ERR_PTR(-EACCES); + + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + + while (len--) { + c = *(const unsigned char *)name++; + if (c == '/' || c == '\0') + return ERR_PTR(-EACCES); + } + /* + * See if the low-level filesystem might want + * to use its own hash.. + */ + if (base->d_flags & DCACHE_OP_HASH) { + int err = base->d_op->d_hash(base, &this); + if (err < 0) + return ERR_PTR(err); + } + + err = inode_permission(base->d_inode, MAY_EXEC); + if (err) + return ERR_PTR(err); + + /* + * __d_lookup() is used to try to get a quick answer and avoid the + * mutex. A false-negative does no harm. + */ + ret = __d_lookup(base, &this); + if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) { + dput(ret); + ret = NULL; + } + if (ret) + return ret; + + mutex_lock(&base->d_inode->i_mutex); + ret = __lookup_hash(&this, base, 0); + mutex_unlock(&base->d_inode->i_mutex); + return ret; +} +EXPORT_SYMBOL(lookup_one_len_unlocked); + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { @@ -2670,10 +2735,6 @@ static int may_open(struct path *path, int acc_mode, int flag) struct inode *inode = dentry->d_inode; int error; - /* O_PATH? */ - if (!acc_mode) - return 0; - if (!inode) return -ENOENT; @@ -2695,7 +2756,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; } - error = inode_permission(inode, acc_mode); + error = inode_permission(inode, MAY_OPEN | acc_mode); if (error) return error; @@ -2887,7 +2948,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if (*opened & FILE_CREATED) { WARN_ON(!(open_flag & O_CREAT)); fsnotify_create(dir, dentry); - acc_mode = MAY_OPEN; + acc_mode = 0; } error = may_open(&file->f_path, acc_mode, open_flag); if (error) @@ -3100,7 +3161,7 @@ retry_lookup: /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; - acc_mode = MAY_OPEN; + acc_mode = 0; path_to_nameidata(&path, nd); goto finish_open_created; } @@ -3184,10 +3245,11 @@ finish_open: got_write = true; } finish_open_created: - error = may_open(&nd->path, acc_mode, open_flag); - if (error) - goto out; - + if (likely(!(open_flag & O_PATH))) { + error = may_open(&nd->path, acc_mode, open_flag); + if (error) + goto out; + } BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); if (!error) { @@ -3274,7 +3336,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, goto out2; audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ - error = may_open(&path, MAY_OPEN, op->open_flag); + error = may_open(&path, 0, op->open_flag); if (error) goto out2; file->f_path.mnt = path.mnt; |