From 49db08c358873af11ba3c25401de88156fa5d365 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 19 Feb 2016 15:36:07 +0100 Subject: chrdev: emit a warning when we go below dynamic major range Currently a dynamically allocated character device major is taken from 254 and downward. This mechanism is used for RTC, IIO and a few other subsystems. The kernel currently has no check prevening these dynamic allocations from eating into the assigned numbers at 233 and downward. In a recent test it was reported that so many dynamic device majors were used on a test server, that the major number for infiniband (231) was stolen. This occurred when allocating a new major number for GPIO chips. The error messages from the kernel were not helpful. (See: https://lkml.org/lkml/2016/2/14/124) This patch adds a defined lower limit of the dynamic major allocation region will henceforth emit a warning if we start to eat into the assigned numbers. It does not do any semantic changes and will not change the kernels behaviour: numbers will still continue to be stolen, but we will know from dmesg what is going on. This also updates the Documentation/devices.txt to clearly reflect that we are using this range of major numbers for dynamic allocation. Reported-by: Ying Huang Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- fs/char_dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index 24b142569ca9..687471dc04a0 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -91,6 +91,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, break; } + if (i < CHRDEV_MAJOR_DYN_END) + pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range", + name, i); + if (i == 0) { ret = -EBUSY; goto out; -- cgit v1.2.3 From a8f324a46fbe5473633af00039e81821be0ce51b Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 9 Feb 2016 11:30:29 +0100 Subject: debugfs: fix inode i_nlink references for automount dentry Directory inodes should start off with i_nlink == 2 (one extra ref for "." entry). debugfs_create_automount() increases neither the i_nlink reference for current inode nor for parent inode. On attempt to remove the automount dentry, kernel complains: [ 86.288070] WARNING: CPU: 1 PID: 3616 at fs/inode.c:273 drop_nlink+0x3e/0x50() [ 86.288461] Modules linked in: debugfs_example2(O-) [ 86.288745] CPU: 1 PID: 3616 Comm: rmmod Tainted: G O 4.4.0-rc3-next-20151207+ #135 [ 86.289197] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.2-20150617_082717-anatol 04/01/2014 [ 86.289696] ffffffff81be05c9 ffff8800b9e6fda0 ffffffff81352e2c 0000000000000000 [ 86.290110] ffff8800b9e6fdd8 ffffffff81065142 ffff8801399175e8 ffff8800bb78b240 [ 86.290507] ffff8801399175e8 ffff8800b73d7898 ffff8800b73d7840 ffff8800b9e6fde8 [ 86.290933] Call Trace: [ 86.291080] [] dump_stack+0x4e/0x82 [ 86.291340] [] warn_slowpath_common+0x82/0xc0 [ 86.291640] [] warn_slowpath_null+0x1a/0x20 [ 86.291932] [] drop_nlink+0x3e/0x50 [ 86.292208] [] simple_unlink+0x4b/0x60 [ 86.292481] [] simple_rmdir+0x37/0x50 [ 86.292748] [] __debugfs_remove.part.16+0xa8/0xd0 [ 86.293082] [] debugfs_remove_recursive+0xdb/0x1c0 [ 86.293406] [] cleanup_module+0x2d/0x3b [debugfs_example2] [ 86.293762] [] SyS_delete_module+0x16b/0x220 [ 86.294077] [] entry_SYSCALL_64_fastpath+0x12/0x6a [ 86.294405] ---[ end trace c9fc53353fe14a36 ]--- [ 86.294639] ------------[ cut here ]------------ To reproduce the issue it is enough to invoke these lines: autom = debugfs_create_automount("automount", NULL, vfsmount_cb, data); BUG_ON(IS_ERR_OR_NULL(autom)); debugfs_remove(autom); The issue is fixed by increasing inode i_nlink references for current and parent inodes. Signed-off-by: Roman Pen Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index bece948b363d..13d65f8a7b12 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -461,7 +461,11 @@ struct dentry *debugfs_create_automount(const char *name, inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = (void *)f; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); d_instantiate(dentry, inode); + inc_nlink(d_inode(dentry->d_parent)); + fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } EXPORT_SYMBOL(debugfs_create_automount); -- cgit v1.2.3 From 1b48b530dac3e600d92854d98a2a519243661f6c Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 22 Feb 2016 07:17:47 -0800 Subject: fs: debugfs: Replace CURRENT_TIME by current_fs_time() CURRENT_TIME macro is not appropriate for filesystems as it doesn't use the right granularity for filesystem timestamps. Use current_fs_time() instead. Signed-off-by: Deepa Dinamani Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 13d65f8a7b12..b1e7f35f3cd4 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -39,7 +39,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = + inode->i_ctime = current_fs_time(sb); } return inode; } -- cgit v1.2.3 From 3a3a5fece6f28c14d3d05c74fb7696412e53a067 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 22 Feb 2016 07:17:53 -0800 Subject: fs: kernfs: Replace CURRENT_TIME by current_fs_time() This is in preparation for the series that transitions filesystem timestamps to use 64 bit time and hence make them y2038 safe. CURRENT_TIME macro will be deleted before merging the aforementioned series. Use current_fs_time() instead of CURRENT_TIME for inode timestamps. struct kernfs_node is associated with a sysfs file/ directory. Truncate the values to appropriate time granularity when writing to inode timestamps of the files. ktime_get_real_ts() is used to obtain times for struct kernfs_iattrs. Since these times are later assigned to inode times using timespec_truncate() for all filesystem based operations, we can save the supers list traversal time here by using ktime_get_real_ts() directly. Signed-off-by: Deepa Dinamani Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 8 +++++--- fs/kernfs/inode.c | 15 ++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 03b688d19f69..eb2c58732bcf 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -753,7 +753,8 @@ int kernfs_add_one(struct kernfs_node *kn) ps_iattr = parent->iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; + ktime_get_real_ts(&ps_iattrs->ia_ctime); + ps_iattrs->ia_mtime = ps_iattrs->ia_ctime; } mutex_unlock(&kernfs_mutex); @@ -1279,8 +1280,9 @@ static void __kernfs_remove(struct kernfs_node *kn) /* update timestamps on the parent */ if (ps_iattr) { - ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; - ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; + ktime_get_real_ts(&ps_iattr->ia_iattr.ia_ctime); + ps_iattr->ia_iattr.ia_mtime = + ps_iattr->ia_iattr.ia_ctime; } kernfs_put(pos); diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 16405ae88d2d..1ac1dbae5aba 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -54,7 +54,10 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) iattrs->ia_mode = kn->mode; iattrs->ia_uid = GLOBAL_ROOT_UID; iattrs->ia_gid = GLOBAL_ROOT_GID; - iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; + + ktime_get_real_ts(&iattrs->ia_atime); + iattrs->ia_mtime = iattrs->ia_atime; + iattrs->ia_ctime = iattrs->ia_atime; simple_xattrs_init(&kn->iattr->xattrs); out_unlock: @@ -236,16 +239,18 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) static inline void set_default_inode_attr(struct inode *inode, umode_t mode) { inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = + inode->i_ctime = current_fs_time(inode->i_sb); } static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) { + struct super_block *sb = inode->i_sb; inode->i_uid = iattr->ia_uid; inode->i_gid = iattr->ia_gid; - inode->i_atime = iattr->ia_atime; - inode->i_mtime = iattr->ia_mtime; - inode->i_ctime = iattr->ia_ctime; + inode->i_atime = timespec_trunc(iattr->ia_atime, sb->s_time_gran); + inode->i_mtime = timespec_trunc(iattr->ia_mtime, sb->s_time_gran); + inode->i_ctime = timespec_trunc(iattr->ia_ctime, sb->s_time_gran); } static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) -- cgit v1.2.3 From 9fd4dcece43a53e5a9e65a973df5693702ee6401 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:13 +0100 Subject: debugfs: prevent access to possibly dead file_operations at file open Nothing prevents a dentry found by path lookup before a return of __debugfs_remove() to actually get opened after that return. Now, after the return of __debugfs_remove(), there are no guarantees whatsoever regarding the memory the corresponding inode's file_operations object had been kept in. Since __debugfs_remove() is seldomly invoked, usually from module exit handlers only, the race is hard to trigger and the impact is very low. A discussion of the problem outlined above as well as a suggested solution can be found in the (sub-)thread rooted at http://lkml.kernel.org/g/20130401203445.GA20862@ZenIV.linux.org.uk ("Yet another pipe related oops.") Basically, Greg KH suggests to introduce an intermediate fops and Al Viro points out that a pointer to the original ones may be stored in ->d_fsdata. Follow this line of reasoning: - Add SRCU as a reverse dependency of DEBUG_FS. - Introduce a srcu_struct object for the debugfs subsystem. - In debugfs_create_file(), store a pointer to the original file_operations object in ->d_fsdata. - Make debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace period after the dentry has been delete()'d and before they return to their callers. - Introduce an intermediate file_operations object named "debugfs_open_proxy_file_operations". It's ->open() functions checks, under the protection of a SRCU read lock, whether the dentry is still alive, i.e. has not been d_delete()'d and if so, tries to acquire a reference on the owning module. On success, it sets the file object's ->f_op to the original file_operations and forwards the ongoing open() call to the original ->open(). - For clarity, rename the former debugfs_file_operations to debugfs_noop_file_operations -- they are in no way canonical. The choice of SRCU over "normal" RCU is justified by the fact, that the former may also be used to protect ->i_private data from going away during the execution of a file's readers and writers which may (and do) sleep. Finally, introduce the fs/debugfs/internal.h header containing some declarations internal to the debugfs implementation. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/debugfs/inode.c | 13 ++++++- fs/debugfs/internal.h | 24 +++++++++++++ include/linux/debugfs.h | 3 -- lib/Kconfig.debug | 1 + 5 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 fs/debugfs/internal.h (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index d2ba12e23ed9..736ab3c988f2 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -22,6 +22,9 @@ #include #include #include +#include + +#include "internal.h" static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -35,13 +38,99 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, return count; } -const struct file_operations debugfs_file_operations = { +const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; +/** + * debugfs_use_file_start - mark the beginning of file data access + * @dentry: the dentry object whose data is being accessed. + * @srcu_idx: a pointer to some memory to store a SRCU index in. + * + * Up to a matching call to debugfs_use_file_finish(), any + * successive call into the file removing functions debugfs_remove() + * and debugfs_remove_recursive() will block. Since associated private + * file data may only get freed after a successful return of any of + * the removal functions, you may safely access it after a successful + * call to debugfs_use_file_start() without worrying about + * lifetime issues. + * + * If -%EIO is returned, the file has already been removed and thus, + * it is not safe to access any of its data. If, on the other hand, + * it is allowed to access the file data, zero is returned. + * + * Regardless of the return code, any call to + * debugfs_use_file_start() must be followed by a matching call + * to debugfs_use_file_finish(). + */ +static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) + __acquires(&debugfs_srcu) +{ + *srcu_idx = srcu_read_lock(&debugfs_srcu); + barrier(); + if (d_unlinked(dentry)) + return -EIO; + return 0; +} + +/** + * debugfs_use_file_finish - mark the end of file data access + * @srcu_idx: the SRCU index "created" by a former call to + * debugfs_use_file_start(). + * + * Allow any ongoing concurrent call into debugfs_remove() or + * debugfs_remove_recursive() blocked by a former call to + * debugfs_use_file_start() to proceed and return to its caller. + */ +static void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) +{ + srcu_read_unlock(&debugfs_srcu, srcu_idx); +} + +#define F_DENTRY(filp) ((filp)->f_path.dentry) + +#define REAL_FOPS_DEREF(dentry) \ + ((const struct file_operations *)(dentry)->d_fsdata) + +static int open_proxy_open(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = NULL; + int srcu_idx, r; + + r = debugfs_use_file_start(dentry, &srcu_idx); + if (r) { + r = -ENOENT; + goto out; + } + + real_fops = REAL_FOPS_DEREF(dentry); + real_fops = fops_get(real_fops); + if (!real_fops) { + /* Huh? Module did not clean up after itself at exit? */ + WARN(1, "debugfs file owner did not clean up at exit: %pd", + dentry); + r = -ENXIO; + goto out; + } + replace_fops(filp, real_fops); + + if (real_fops->open) + r = real_fops->open(inode, filp); + +out: + fops_put(real_fops); + debugfs_use_file_finish(srcu_idx); + return r; +} + +const struct file_operations debugfs_open_proxy_file_operations = { + .open = open_proxy_open, +}; + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b1e7f35f3cd4..2905dd160575 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -27,9 +27,14 @@ #include #include #include +#include + +#include "internal.h" #define DEBUGFS_DEFAULT_MODE 0700 +DEFINE_SRCU(debugfs_srcu); + static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; @@ -341,8 +346,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, return failed_creating(dentry); inode->i_mode = mode; - inode->i_fop = fops ? fops : &debugfs_file_operations; inode->i_private = data; + + inode->i_fop = fops ? &debugfs_open_proxy_file_operations + : &debugfs_noop_file_operations; + dentry->d_fsdata = (void *)fops; + d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); @@ -570,6 +579,7 @@ void debugfs_remove(struct dentry *dentry) inode_unlock(d_inode(parent)); if (!ret) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove); @@ -647,6 +657,7 @@ void debugfs_remove_recursive(struct dentry *dentry) if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); inode_unlock(d_inode(parent)); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h new file mode 100644 index 000000000000..c7aaa5cb6685 --- /dev/null +++ b/fs/debugfs/internal.h @@ -0,0 +1,24 @@ +/* + * internal.h - declarations internal to debugfs + * + * Copyright (C) 2016 Nicolai Stange + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + */ + +#ifndef _DEBUGFS_INTERNAL_H_ +#define _DEBUGFS_INTERNAL_H_ + +struct file_operations; +struct srcu_struct; + +/* declared over in file.c */ +extern const struct file_operations debugfs_noop_file_operations; +extern const struct file_operations debugfs_open_proxy_file_operations; + +extern struct srcu_struct debugfs_srcu; + +#endif /* _DEBUGFS_INTERNAL_H_ */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 981e53ab84e8..fcafe2d389f9 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -43,9 +43,6 @@ extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) -/* declared over in file.c */ -extern const struct file_operations debugfs_file_operations; - struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1e9a607534ca..ddb0e8337aae 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -257,6 +257,7 @@ config PAGE_OWNER config DEBUG_FS bool "Debug Filesystem" + select SRCU help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and -- cgit v1.2.3 From 49d200deaa680501f19a247b1fffb29301e51d2b Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:14 +0100 Subject: debugfs: prevent access to removed files' private data Upon return of debugfs_remove()/debugfs_remove_recursive(), it might still be attempted to access associated private file data through previously opened struct file objects. If that data has been freed by the caller of debugfs_remove*() in the meanwhile, the reading/writing process would either encounter a fault or, if the memory address in question has been reassigned again, unrelated data structures could get overwritten. However, since debugfs files are seldomly removed, usually from module exit handlers only, the impact is very low. Currently, there are ~1000 call sites of debugfs_create_file() spread throughout the whole tree and touching all of those struct file_operations in order to make them file removal aware by means of checking the result of debugfs_use_file_start() from within their methods is unfeasible. Instead, wrap the struct file_operations by a lifetime managing proxy at file open: - In debugfs_create_file(), the original fops handed in has got stashed away in ->d_fsdata already. - In debugfs_create_file(), install a proxy file_operations factory, debugfs_full_proxy_file_operations, at ->i_fop. This proxy factory has got an ->open() method only. It carries out some lifetime checks and if successful, dynamically allocates and sets up a new struct file_operations proxy at ->f_op. Afterwards, it forwards to the ->open() of the original struct file_operations in ->d_fsdata, if any. The dynamically set up proxy at ->f_op has got a lifetime managing wrapper set for each of the methods defined in the original struct file_operations in ->d_fsdata. Its ->release()er frees the proxy again and forwards to the original ->release(), if any. In order not to mislead the VFS layer, it is strictly necessary to leave those fields blank in the proxy that have been NULL in the original struct file_operations also, i.e. aren't supported. This is why there is a need for dynamically allocated proxies. The choice made not to allocate a proxy instance for every dentry at file creation, but for every struct file object instantiated thereof is justified by the expected usage pattern of debugfs, namely that in general very few files get opened more than once at a time. The wrapper methods set in the struct file_operations implement lifetime managing by means of the SRCU protection facilities already in place for debugfs: They set up a SRCU read side critical section and check whether the dentry is still alive by means of debugfs_use_file_start(). If so, they forward the call to the original struct file_operation stored in ->d_fsdata, still under the protection of the SRCU read side critical section. This SRCU read side critical section prevents any pending debugfs_remove() and friends to return to their callers. Since a file's private data must only be freed after the return of debugfs_remove(), the ongoing proxied call is guarded against any file removal race. If, on the other hand, the initial call to debugfs_use_file_start() detects that the dentry is dead, the wrapper simply returns -EIO and does not forward the call. Note that the ->poll() wrapper is special in that its signature does not allow for the return of arbitrary -EXXX values and thus, POLLHUP is returned here. In order not to pollute debugfs with wrapper definitions that aren't ever needed, I chose not to define a wrapper for every struct file_operations method possible. Instead, a wrapper is defined only for the subset of methods which are actually set by any debugfs users. Currently, these are: ->llseek() ->read() ->write() ->unlocked_ioctl() ->poll() The ->release() wrapper is special in that it does not protect the original ->release() in any way from dead files in order not to leak resources. Thus, any ->release() handed to debugfs must implement file lifetime management manually, if needed. For only 33 out of a total of 434 releasers handed in to debugfs, it could not be verified immediately whether they access data structures that might have been freed upon a debugfs_remove() return in the meanwhile. Export debugfs_use_file_start() and debugfs_use_file_finish() in order to allow any ->release() to manually implement file lifetime management. For a set of common cases of struct file_operations implemented by the debugfs_core itself, future patches will incorporate file lifetime management directly within those in order to allow for their unproxied operation. Rename the original, non-proxying "debugfs_create_file()" to "debugfs_create_file_unsafe()" and keep it for future internal use by debugfs itself. Factor out code common to both into the new __debugfs_create_file(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++- fs/debugfs/inode.c | 70 ++++++++++++++------- fs/debugfs/internal.h | 6 +- include/linux/debugfs.h | 20 ++++++ 4 files changed, 226 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 736ab3c988f2..6eb58a8ed03c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -23,9 +23,12 @@ #include #include #include +#include #include "internal.h" +struct poll_table_struct; + static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -66,7 +69,7 @@ const struct file_operations debugfs_noop_file_operations = { * debugfs_use_file_start() must be followed by a matching call * to debugfs_use_file_finish(). */ -static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) +int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) __acquires(&debugfs_srcu) { *srcu_idx = srcu_read_lock(&debugfs_srcu); @@ -75,6 +78,7 @@ static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) return -EIO; return 0; } +EXPORT_SYMBOL_GPL(debugfs_use_file_start); /** * debugfs_use_file_finish - mark the end of file data access @@ -85,10 +89,11 @@ static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) * debugfs_remove_recursive() blocked by a former call to * debugfs_use_file_start() to proceed and return to its caller. */ -static void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) +void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { srcu_read_unlock(&debugfs_srcu, srcu_idx); } +EXPORT_SYMBOL_GPL(debugfs_use_file_finish); #define F_DENTRY(filp) ((filp)->f_path.dentry) @@ -131,6 +136,154 @@ const struct file_operations debugfs_open_proxy_file_operations = { .open = open_proxy_open, }; +#define PROTO(args...) args +#define ARGS(args...) args + +#define FULL_PROXY_FUNC(name, ret_type, filp, proto, args) \ +static ret_type full_proxy_ ## name(proto) \ +{ \ + const struct dentry *dentry = F_DENTRY(filp); \ + const struct file_operations *real_fops = \ + REAL_FOPS_DEREF(dentry); \ + int srcu_idx; \ + ret_type r; \ + \ + r = debugfs_use_file_start(dentry, &srcu_idx); \ + if (likely(!r)) \ + r = real_fops->name(args); \ + debugfs_use_file_finish(srcu_idx); \ + return r; \ +} + +FULL_PROXY_FUNC(llseek, loff_t, filp, + PROTO(struct file *filp, loff_t offset, int whence), + ARGS(filp, offset, whence)); + +FULL_PROXY_FUNC(read, ssize_t, filp, + PROTO(struct file *filp, char __user *buf, size_t size, + loff_t *ppos), + ARGS(filp, buf, size, ppos)); + +FULL_PROXY_FUNC(write, ssize_t, filp, + PROTO(struct file *filp, const char __user *buf, size_t size, + loff_t *ppos), + ARGS(filp, buf, size, ppos)); + +FULL_PROXY_FUNC(unlocked_ioctl, long, filp, + PROTO(struct file *filp, unsigned int cmd, unsigned long arg), + ARGS(filp, cmd, arg)); + +static unsigned int full_proxy_poll(struct file *filp, + struct poll_table_struct *wait) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry); + int srcu_idx; + unsigned int r = 0; + + if (debugfs_use_file_start(dentry, &srcu_idx)) { + debugfs_use_file_finish(srcu_idx); + return POLLHUP; + } + + r = real_fops->poll(filp, wait); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static int full_proxy_release(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry); + const struct file_operations *proxy_fops = filp->f_op; + int r = 0; + + /* + * We must not protect this against removal races here: the + * original releaser should be called unconditionally in order + * not to leak any resources. Releasers must not assume that + * ->i_private is still being meaningful here. + */ + if (real_fops->release) + r = real_fops->release(inode, filp); + + replace_fops(filp, d_inode(dentry)->i_fop); + kfree((void *)proxy_fops); + fops_put(real_fops); + return 0; +} + +static void __full_proxy_fops_init(struct file_operations *proxy_fops, + const struct file_operations *real_fops) +{ + proxy_fops->release = full_proxy_release; + if (real_fops->llseek) + proxy_fops->llseek = full_proxy_llseek; + if (real_fops->read) + proxy_fops->read = full_proxy_read; + if (real_fops->write) + proxy_fops->write = full_proxy_write; + if (real_fops->poll) + proxy_fops->poll = full_proxy_poll; + if (real_fops->unlocked_ioctl) + proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; +} + +static int full_proxy_open(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = NULL; + struct file_operations *proxy_fops = NULL; + int srcu_idx, r; + + r = debugfs_use_file_start(dentry, &srcu_idx); + if (r) { + r = -ENOENT; + goto out; + } + + real_fops = REAL_FOPS_DEREF(dentry); + real_fops = fops_get(real_fops); + if (!real_fops) { + /* Huh? Module did not cleanup after itself at exit? */ + WARN(1, "debugfs file owner did not clean up at exit: %pd", + dentry); + r = -ENXIO; + goto out; + } + + proxy_fops = kzalloc(sizeof(*proxy_fops), GFP_KERNEL); + if (!proxy_fops) { + r = -ENOMEM; + goto free_proxy; + } + __full_proxy_fops_init(proxy_fops, real_fops); + replace_fops(filp, proxy_fops); + + if (real_fops->open) { + r = real_fops->open(inode, filp); + + if (filp->f_op != proxy_fops) { + /* No protection against file removal anymore. */ + WARN(1, "debugfs file owner replaced proxy fops: %pd", + dentry); + goto free_proxy; + } + } + + goto out; +free_proxy: + kfree(proxy_fops); + fops_put(real_fops); +out: + debugfs_use_file_finish(srcu_idx); + return r; +} + +const struct file_operations debugfs_full_proxy_file_operations = { + .open = full_proxy_open, +}; + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2905dd160575..136f269f01de 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -300,6 +300,37 @@ static struct dentry *end_creating(struct dentry *dentry) return dentry; } +static struct dentry *__debugfs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *proxy_fops, + const struct file_operations *real_fops) +{ + struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + BUG_ON(!S_ISREG(mode)); + dentry = start_creating(name, parent); + + if (IS_ERR(dentry)) + return NULL; + + inode = debugfs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = mode; + inode->i_private = data; + + inode->i_fop = proxy_fops; + dentry->d_fsdata = (void *)real_fops; + + d_instantiate(dentry, inode); + fsnotify_create(d_inode(dentry->d_parent), dentry); + return end_creating(dentry); +} + /** * debugfs_create_file - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. @@ -330,33 +361,24 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct dentry *dentry; - struct inode *inode; - - if (!(mode & S_IFMT)) - mode |= S_IFREG; - BUG_ON(!S_ISREG(mode)); - dentry = start_creating(name, parent); - - if (IS_ERR(dentry)) - return NULL; - - inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) - return failed_creating(dentry); - inode->i_mode = mode; - inode->i_private = data; + return __debugfs_create_file(name, mode, parent, data, + fops ? &debugfs_full_proxy_file_operations : + &debugfs_noop_file_operations, + fops); +} +EXPORT_SYMBOL_GPL(debugfs_create_file); - inode->i_fop = fops ? &debugfs_open_proxy_file_operations - : &debugfs_noop_file_operations; - dentry->d_fsdata = (void *)fops; +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ - d_instantiate(dentry, inode); - fsnotify_create(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return __debugfs_create_file(name, mode, parent, data, + fops ? &debugfs_open_proxy_file_operations : + &debugfs_noop_file_operations, + fops); } -EXPORT_SYMBOL_GPL(debugfs_create_file); /** * debugfs_create_file_size - create a file in the debugfs filesystem @@ -579,6 +601,7 @@ void debugfs_remove(struct dentry *dentry) inode_unlock(d_inode(parent)); if (!ret) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove); @@ -657,6 +680,7 @@ void debugfs_remove_recursive(struct dentry *dentry) if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); inode_unlock(d_inode(parent)); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index c7aaa5cb6685..bba52634b995 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -13,12 +13,14 @@ #define _DEBUGFS_INTERNAL_H_ struct file_operations; -struct srcu_struct; /* declared over in file.c */ extern const struct file_operations debugfs_noop_file_operations; extern const struct file_operations debugfs_open_proxy_file_operations; +extern const struct file_operations debugfs_full_proxy_file_operations; -extern struct srcu_struct debugfs_srcu; +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); #endif /* _DEBUGFS_INTERNAL_H_ */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index fcafe2d389f9..a63e6ea3321c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -19,9 +19,11 @@ #include #include +#include struct device; struct file_operations; +struct srcu_struct; struct debugfs_blob_wrapper { void *data; @@ -41,6 +43,8 @@ struct debugfs_regset32 { extern struct dentry *arch_debugfs_dir; +extern struct srcu_struct debugfs_srcu; + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -65,6 +69,11 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); +int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) + __acquires(&debugfs_srcu); + +void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -173,6 +182,17 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +static inline int debugfs_use_file_start(const struct dentry *dentry, + int *srcu_idx) + __acquires(&debugfs_srcu) +{ + return 0; +} + +static inline void debugfs_use_file_finish(int srcu_idx) + __releases(&debugfs_srcu) +{ } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From c64688081490321f2d23a292ef24e60bb321f3f1 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:15 +0100 Subject: debugfs: add support for self-protecting attribute file fops In order to protect them against file removal issues, debugfs_create_file() creates a lifetime managing proxy around each struct file_operations handed in. In cases where this struct file_operations is able to manage file lifetime by itself already, the proxy created by debugfs is a waste of resources. The most common class of struct file_operations given to debugfs are those defined by means of the DEFINE_SIMPLE_ATTRIBUTE() macro. Introduce a DEFINE_DEBUGFS_ATTRIBUTE() macro to allow any struct file_operations of this class to be easily made file lifetime aware and thus, to be operated unproxied. Specifically, introduce debugfs_attr_read() and debugfs_attr_write() which wrap simple_attr_read() and simple_attr_write() under the protection of a debugfs_use_file_start()/debugfs_use_file_finish() pair. Make DEFINE_DEBUGFS_ATTRIBUTE() set the defined struct file_operations' ->read() and ->write() members to these wrappers. Export debugfs_create_file_unsafe() in order to allow debugfs users to create their files in non-proxying operation mode. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 28 ++++++++++++++++++++++++++++ fs/debugfs/inode.c | 28 ++++++++++++++++++++++++++++ include/linux/debugfs.h | 26 ++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 6eb58a8ed03c..8ef56d9499a4 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -284,6 +284,34 @@ const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open, }; +ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + int srcu_idx; + + ret = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!ret)) + ret = simple_attr_read(file, buf, len, ppos); + debugfs_use_file_finish(srcu_idx); + return ret; +} +EXPORT_SYMBOL_GPL(debugfs_attr_read); + +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + int srcu_idx; + + ret = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!ret)) + ret = simple_attr_write(file, buf, len, ppos); + debugfs_use_file_finish(srcu_idx); + return ret; +} +EXPORT_SYMBOL_GPL(debugfs_attr_write); + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 136f269f01de..41e079a8da26 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -369,6 +369,33 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_file); +/** + * debugfs_create_file_unsafe - create a file in the debugfs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the debugfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * debugfs_create_file_unsafe() is completely analogous to + * debugfs_create_file(), the only difference being that the fops + * handed it will not get protected against file removals by the + * debugfs core. + * + * It is your responsibility to protect your struct file_operation + * methods against file removals by means of debugfs_use_file_start() + * and debugfs_use_file_finish(). ->open() is still protected by + * debugfs though. + * + * Any struct file_operations defined by means of + * DEFINE_DEBUGFS_ATTRIBUTE() is protected against file removals and + * thus, may be used here. + */ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) @@ -379,6 +406,7 @@ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, &debugfs_noop_file_operations, fops); } +EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); /** * debugfs_create_file_size - create a file in the debugfs filesystem diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a63e6ea3321c..1438e2322d5c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -50,6 +50,9 @@ extern struct srcu_struct debugfs_srcu; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); struct dentry *debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -74,6 +77,26 @@ int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); +ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); + +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -193,6 +216,9 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + static const struct file_operations __fops = { 0 } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From 4909f168104b24f592fb8d502e2a6520346a3927 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:17 +0100 Subject: debugfs: unproxify integer attribute files Currently, the struct file_operations associated with the integer attribute style files created through the debugfs_create_*() helpers are not file lifetime aware as they are defined by means of DEFINE_SIMPLE_ATTRIBUTE(). Thus, a lifetime managing proxy is created around the original fops each time such a file is opened which is an unnecessary waste of resources. Migrate all usages of DEFINE_SIMPLE_ATTRIBUTE() within debugfs itself to DEFINE_DEBUGFS_ATTRIBUTE() in order to implement file lifetime managing within the struct file_operations thus defined. Introduce the debugfs_create_mode_unsafe() helper, analogous to debugfs_create_mode(), but distinct in that it creates the files in non-proxying operation mode through debugfs_create_file_unsafe(). Feed all struct file_operations migrated to DEFINE_DEBUGFS_ATTRIBUTE() into debugfs_create_mode_unsafe() instead of former debugfs_create_mode(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 123 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 8ef56d9499a4..4b3967e86e97 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -328,6 +328,24 @@ static struct dentry *debugfs_create_mode(const char *name, umode_t mode, return debugfs_create_file(name, mode, parent, value, fops); } +static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *value, + const struct file_operations *fops, + const struct file_operations *fops_ro, + const struct file_operations *fops_wo) +{ + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file_unsafe(name, mode, parent, value, + fops_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file_unsafe(name, mode, parent, value, + fops_wo); + + return debugfs_create_file_unsafe(name, mode, parent, value, fops); +} + static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; @@ -338,9 +356,9 @@ static int debugfs_u8_get(void *data, u64 *val) *val = *(u8 *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value @@ -369,7 +387,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); struct dentry *debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_u8, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u8, &fops_u8_ro, &fops_u8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u8); @@ -384,9 +402,9 @@ static int debugfs_u16_get(void *data, u64 *val) *val = *(u16 *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); /** * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value @@ -415,7 +433,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_u16, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u16, &fops_u16_ro, &fops_u16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u16); @@ -430,9 +448,9 @@ static int debugfs_u32_get(void *data, u64 *val) *val = *(u32 *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); /** * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value @@ -461,7 +479,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_u32, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); @@ -477,9 +495,9 @@ static int debugfs_u64_get(void *data, u64 *val) *val = *(u64 *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); /** * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value @@ -508,7 +526,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); struct dentry *debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_u64, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u64, &fops_u64_ro, &fops_u64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u64); @@ -524,9 +542,10 @@ static int debugfs_ulong_get(void *data, u64 *val) *val = *(unsigned long *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, + "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); /** * debugfs_create_ulong - create a debugfs file that is used to read and write @@ -556,26 +575,30 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); struct dentry *debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_ulong, - &fops_ulong_ro, &fops_ulong_wo); + return debugfs_create_mode_unsafe(name, mode, parent, value, + &fops_ulong, &fops_ulong_ro, + &fops_ulong_wo); } EXPORT_SYMBOL_GPL(debugfs_create_ulong); -DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, + "0x%04llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, + "0x%08llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, + "0x%016llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); /* * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value @@ -598,7 +621,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_x8, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x8, &fops_x8_ro, &fops_x8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x8); @@ -616,7 +639,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8); struct dentry *debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_x16, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x16, &fops_x16_ro, &fops_x16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x16); @@ -634,7 +657,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16); struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_x32, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x32, &fops_x32_ro, &fops_x32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x32); @@ -652,7 +675,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x32); struct dentry *debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_x64, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x64, &fops_x64_ro, &fops_x64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x64); @@ -668,10 +691,10 @@ static int debugfs_size_t_get(void *data, u64 *val) *val = *(size_t *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set, - "%llu\n"); /* %llu and %zu are more or less the same */ -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set, + "%llu\n"); /* %llu and %zu are more or less the same */ +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); /** * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value @@ -686,8 +709,9 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); struct dentry *debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_size_t, - &fops_size_t_ro, &fops_size_t_wo); + return debugfs_create_mode_unsafe(name, mode, parent, value, + &fops_size_t, &fops_size_t_ro, + &fops_size_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_size_t); @@ -701,10 +725,12 @@ static int debugfs_atomic_t_get(void *data, u64 *val) *val = atomic_read((atomic_t *)data); return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, + "%lld\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, + "%lld\n"); /** * debugfs_create_atomic_t - create a debugfs file that is used to read and @@ -720,8 +746,9 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_atomic_t, - &fops_atomic_t_ro, &fops_atomic_t_wo); + return debugfs_create_mode_unsafe(name, mode, parent, value, + &fops_atomic_t, &fops_atomic_t_ro, + &fops_atomic_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); -- cgit v1.2.3 From 4d45f7974ccf0aa783034fef2661573b3a28609e Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:18 +0100 Subject: debugfs: unproxify files created through debugfs_create_bool() Currently, the struct file_operations fops_bool associated with files created through the debugfs_create_bool() helpers are not file lifetime aware. Thus, a lifetime managing proxy is created around fops_bool each time such a file is opened which is an unnecessary waste of resources. Implement file lifetime management for the fops_bool file_operations. Namely, make debugfs_read_file_bool() and debugfs_write_file_bool() safe against file removals by means of debugfs_use_file_start() and debugfs_use_file_finish(). Make debugfs_create_bool() create its files in non-proxying operation mode through debugfs_create_mode_unsafe(). Finally, purge debugfs_create_mode() as debugfs_create_bool() had been its last user. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4b3967e86e97..8a548bee1b3d 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -312,22 +312,6 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, } EXPORT_SYMBOL_GPL(debugfs_attr_write); -static struct dentry *debugfs_create_mode(const char *name, umode_t mode, - struct dentry *parent, void *value, - const struct file_operations *fops, - const struct file_operations *fops_ro, - const struct file_operations *fops_wo) -{ - /* if there are no write bits set, make read only */ - if (!(mode & S_IWUGO)) - return debugfs_create_file(name, mode, parent, value, fops_ro); - /* if there are no read bits set, make write only */ - if (!(mode & S_IRUGO)) - return debugfs_create_file(name, mode, parent, value, fops_wo); - - return debugfs_create_file(name, mode, parent, value, fops); -} - static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, @@ -756,9 +740,17 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[3]; - bool *val = file->private_data; + bool val; + int r, srcu_idx; + + r = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!r)) + val = *(bool *)file->private_data; + debugfs_use_file_finish(srcu_idx); + if (r) + return r; - if (*val) + if (val) buf[0] = 'Y'; else buf[0] = 'N'; @@ -774,6 +766,7 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, char buf[32]; size_t buf_size; bool bv; + int r, srcu_idx; bool *val = file->private_data; buf_size = min(count, (sizeof(buf)-1)); @@ -781,8 +774,14 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, return -EFAULT; buf[buf_size] = '\0'; - if (strtobool(buf, &bv) == 0) - *val = bv; + if (strtobool(buf, &bv) == 0) { + r = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!r)) + *val = bv; + debugfs_use_file_finish(srcu_idx); + if (r) + return r; + } return count; } @@ -834,7 +833,7 @@ static const struct file_operations fops_bool_wo = { struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value) { - return debugfs_create_mode(name, mode, parent, value, &fops_bool, + return debugfs_create_mode_unsafe(name, mode, parent, value, &fops_bool, &fops_bool_ro, &fops_bool_wo); } EXPORT_SYMBOL_GPL(debugfs_create_bool); -- cgit v1.2.3 From 83b711cbf4ff42a9996c5f092762b3967d307d73 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:19 +0100 Subject: debugfs: unproxify files created through debugfs_create_blob() Currently, the struct file_operations fops_blob associated with files created through the debugfs_create_blob() helpers are not file lifetime aware. Thus, a lifetime managing proxy is created around fops_blob each time such a file is opened which is an unnecessary waste of resources. Implement file lifetime management for the fops_bool file_operations. Namely, make read_file_blob() safe gainst file removals by means of debugfs_use_file_start() and debugfs_use_file_finish(). Make debugfs_create_blob() create its files in non-proxying operation mode by means of debugfs_create_file_unsafe(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 8a548bee1b3d..2e86d66f7850 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -842,8 +842,15 @@ static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; - return simple_read_from_buffer(user_buf, count, ppos, blob->data, - blob->size); + ssize_t r; + int srcu_idx; + + r = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!r)) + r = simple_read_from_buffer(user_buf, count, ppos, blob->data, + blob->size); + debugfs_use_file_finish(srcu_idx); + return r; } static const struct file_operations fops_blob = { @@ -880,7 +887,7 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { - return debugfs_create_file(name, mode, parent, blob, &fops_blob); + return debugfs_create_file_unsafe(name, mode, parent, blob, &fops_blob); } EXPORT_SYMBOL_GPL(debugfs_create_blob); -- cgit v1.2.3 From c4a74f63dfd2e75e7d40a9aaa4052b0ef26e617c Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:20 +0100 Subject: debugfs: unproxify files created through debugfs_create_u32_array() The struct file_operations u32_array_fops associated with files created through debugfs_create_u32_array() has been lifetime aware already: everything needed for subsequent operation is copied to a ->f_private buffer at file opening time in u32_array_open(). Now, ->open() is always protected against file removal issues by the debugfs core. There is no need for the debugfs core to wrap the u32_array_fops with a file lifetime managing proxy. Make debugfs_create_u32_array() create its files in non-proxying operation mode by means of debugfs_create_file_unsafe(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2e86d66f7850..9c1c9a01b7e5 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -992,7 +992,8 @@ struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, data->array = array; data->elements = elements; - return debugfs_create_file(name, mode, parent, data, &u32_array_fops); + return debugfs_create_file_unsafe(name, mode, parent, data, + &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); -- cgit v1.2.3 From e4234a1fc343ca35f852bc527fae56fade879d4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Mar 2016 11:45:06 +0100 Subject: kernfs: Move faulting copy_user operations outside of the mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A fault in a user provided buffer may lead anywhere, and lockdep warns that we have a potential deadlock between the mm->mmap_sem and the kernfs file mutex: [ 82.811702] ====================================================== [ 82.811705] [ INFO: possible circular locking dependency detected ] [ 82.811709] 4.5.0-rc4-gfxbench+ #1 Not tainted [ 82.811711] ------------------------------------------------------- [ 82.811714] kms_setmode/5859 is trying to acquire lock: [ 82.811717] (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_mmap+0x1a1/0x270 [ 82.811731] but task is already holding lock: [ 82.811734] (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0 [ 82.811745] which lock already depends on the new lock. [ 82.811749] the existing dependency chain (in reverse order) is: [ 82.811752] -> #3 (&mm->mmap_sem){++++++}: [ 82.811761] [] lock_acquire+0xc3/0x1d0 [ 82.811766] [] __might_fault+0x75/0xa0 [ 82.811771] [] kernfs_fop_write+0x8a/0x180 [ 82.811787] [] __vfs_write+0x23/0xe0 [ 82.811792] [] vfs_write+0xa4/0x190 [ 82.811797] [] SyS_write+0x44/0xb0 [ 82.811801] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.811807] -> #2 (s_active#6){++++.+}: [ 82.811814] [] lock_acquire+0xc3/0x1d0 [ 82.811819] [] __kernfs_remove+0x210/0x2f0 [ 82.811823] [] kernfs_remove_by_name_ns+0x40/0xa0 [ 82.811828] [] sysfs_remove_file_ns+0x10/0x20 [ 82.811832] [] device_del+0x124/0x250 [ 82.811837] [] device_unregister+0x19/0x60 [ 82.811841] [] cpu_cache_sysfs_exit+0x51/0xb0 [ 82.811846] [] cacheinfo_cpu_callback+0x38/0x70 [ 82.811851] [] notifier_call_chain+0x39/0xa0 [ 82.811856] [] __raw_notifier_call_chain+0x9/0x10 [ 82.811860] [] cpu_notify+0x1e/0x40 [ 82.811865] [] cpu_notify_nofail+0x9/0x20 [ 82.811869] [] _cpu_down+0x233/0x340 [ 82.811874] [] disable_nonboot_cpus+0xc9/0x350 [ 82.811878] [] suspend_devices_and_enter+0x5a1/0xb50 [ 82.811883] [] pm_suspend+0x543/0x8d0 [ 82.811888] [] state_store+0x77/0xe0 [ 82.811892] [] kobj_attr_store+0xf/0x20 [ 82.811897] [] sysfs_kf_write+0x40/0x50 [ 82.811902] [] kernfs_fop_write+0x13c/0x180 [ 82.811906] [] __vfs_write+0x23/0xe0 [ 82.811910] [] vfs_write+0xa4/0x190 [ 82.811914] [] SyS_write+0x44/0xb0 [ 82.811918] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.811923] -> #1 (cpu_hotplug.lock){+.+.+.}: [ 82.811929] [] lock_acquire+0xc3/0x1d0 [ 82.811933] [] mutex_lock_nested+0x62/0x3b0 [ 82.811940] [] get_online_cpus+0x61/0x80 [ 82.811944] [] stop_machine+0x1b/0xe0 [ 82.811949] [] gen8_ggtt_insert_entries__BKL+0x2d/0x30 [i915] [ 82.812009] [] ggtt_bind_vma+0x46/0x70 [i915] [ 82.812045] [] i915_vma_bind+0x140/0x290 [i915] [ 82.812081] [] i915_gem_object_do_pin+0x899/0xb00 [i915] [ 82.812117] [] i915_gem_object_pin+0x35/0x40 [i915] [ 82.812154] [] intel_init_pipe_control+0xbe/0x210 [i915] [ 82.812192] [] intel_logical_rings_init+0xe2/0xde0 [i915] [ 82.812232] [] i915_gem_init+0xf3/0x130 [i915] [ 82.812278] [] i915_driver_load+0xf2d/0x1770 [i915] [ 82.812318] [] drm_dev_register+0xa4/0xb0 [ 82.812323] [] drm_get_pci_dev+0xce/0x1e0 [ 82.812328] [] i915_pci_probe+0x2f/0x50 [i915] [ 82.812360] [] pci_device_probe+0x87/0xf0 [ 82.812366] [] driver_probe_device+0x229/0x450 [ 82.812371] [] __driver_attach+0x83/0x90 [ 82.812375] [] bus_for_each_dev+0x61/0xa0 [ 82.812380] [] driver_attach+0x19/0x20 [ 82.812384] [] bus_add_driver+0x1ef/0x290 [ 82.812388] [] driver_register+0x5b/0xe0 [ 82.812393] [] __pci_register_driver+0x5b/0x60 [ 82.812398] [] drm_pci_init+0xd6/0x100 [ 82.812402] [] 0xffffffffa027c094 [ 82.812406] [] do_one_initcall+0xae/0x1d0 [ 82.812412] [] do_init_module+0x5b/0x1cb [ 82.812417] [] load_module+0x1c20/0x2480 [ 82.812422] [] SyS_finit_module+0x7e/0xa0 [ 82.812428] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.812433] -> #0 (&dev->struct_mutex){+.+.+.}: [ 82.812439] [] __lock_acquire+0x1fc9/0x20f0 [ 82.812443] [] lock_acquire+0xc3/0x1d0 [ 82.812456] [] drm_gem_mmap+0x1c7/0x270 [ 82.812460] [] mmap_region+0x334/0x580 [ 82.812466] [] do_mmap+0x364/0x410 [ 82.812470] [] vm_mmap_pgoff+0x6d/0xa0 [ 82.812474] [] SyS_mmap_pgoff+0x184/0x220 [ 82.812479] [] SyS_mmap+0x1d/0x20 [ 82.812484] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.812489] other info that might help us debug this: [ 82.812493] Chain exists of: &dev->struct_mutex --> s_active#6 --> &mm->mmap_sem [ 82.812502] Possible unsafe locking scenario: [ 82.812506] CPU0 CPU1 [ 82.812508] ---- ---- [ 82.812510] lock(&mm->mmap_sem); [ 82.812514] lock(s_active#6); [ 82.812519] lock(&mm->mmap_sem); [ 82.812522] lock(&dev->struct_mutex); [ 82.812526] *** DEADLOCK *** [ 82.812531] 1 lock held by kms_setmode/5859: [ 82.812533] #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0 [ 82.812541] stack backtrace: [ 82.812547] CPU: 0 PID: 5859 Comm: kms_setmode Not tainted 4.5.0-rc4-gfxbench+ #1 [ 82.812550] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0040.2015.0814.1353 08/14/2015 [ 82.812553] 0000000000000000 ffff880079407bf0 ffffffff813f8505 ffffffff825fb270 [ 82.812560] ffffffff825c4190 ffff880079407c30 ffffffff810c84ac ffff880079407c90 [ 82.812566] ffff8800797ed328 ffff8800797ecb00 0000000000000001 ffff8800797ed350 [ 82.812573] Call Trace: [ 82.812578] [] dump_stack+0x67/0x92 [ 82.812582] [] print_circular_bug+0x1fc/0x310 [ 82.812586] [] __lock_acquire+0x1fc9/0x20f0 [ 82.812590] [] lock_acquire+0xc3/0x1d0 [ 82.812594] [] ? drm_gem_mmap+0x1a1/0x270 [ 82.812599] [] drm_gem_mmap+0x1c7/0x270 [ 82.812603] [] ? drm_gem_mmap+0x1a1/0x270 [ 82.812608] [] mmap_region+0x334/0x580 [ 82.812612] [] do_mmap+0x364/0x410 [ 82.812616] [] vm_mmap_pgoff+0x6d/0xa0 [ 82.812629] [] SyS_mmap_pgoff+0x184/0x220 [ 82.812633] [] SyS_mmap+0x1d/0x20 [ 82.812637] [] entry_SYSCALL_64_fastpath+0x16/0x73 Highly unlikely though this scenario is, we can avoid the issue entirely by moving the copy operation from out under the kernfs_get_active() tracking by assigning the preallocated buffer its own mutex. The temporary buffer allocation doesn't require mutex locking as it is entirely local. The locked section was extended by the addition of the preallocated buf to speed up md user operations in commit 2b75869bba676c248d8d25ae6d2bd9221dfffdb6 Author: NeilBrown Date: Mon Oct 13 16:41:28 2014 +1100 sysfs/kernfs: allow attributes to request write buffer be pre-allocated. Reported-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: NeilBrown Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 51 ++++++++++++++++++++++++++++---------------------- include/linux/kernfs.h | 1 + 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 7247252ee9b1..e1574008adc9 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -190,15 +190,16 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, char *buf; buf = of->prealloc_buf; - if (!buf) + if (buf) + mutex_lock(&of->prealloc_mutex); + else buf = kmalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; /* * @of->mutex nests outside active ref and is used both to ensure that - * the ops aren't called concurrently for the same open file, and - * to provide exclusive access to ->prealloc_buf (when that exists). + * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { @@ -214,21 +215,23 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, else len = -EINVAL; + kernfs_put_active(of->kn); + mutex_unlock(&of->mutex); + if (len < 0) - goto out_unlock; + goto out_free; if (copy_to_user(user_buf, buf, len)) { len = -EFAULT; - goto out_unlock; + goto out_free; } *ppos += len; - out_unlock: - kernfs_put_active(of->kn); - mutex_unlock(&of->mutex); out_free: - if (buf != of->prealloc_buf) + if (buf == of->prealloc_buf) + mutex_unlock(&of->prealloc_mutex); + else kfree(buf); return len; } @@ -284,15 +287,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, } buf = of->prealloc_buf; - if (!buf) + if (buf) + mutex_lock(&of->prealloc_mutex); + else buf = kmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; + if (copy_from_user(buf, user_buf, len)) { + len = -EFAULT; + goto out_free; + } + buf[len] = '\0'; /* guarantee string termination */ + /* * @of->mutex nests outside active ref and is used both to ensure that - * the ops aren't called concurrently for the same open file, and - * to provide exclusive access to ->prealloc_buf (when that exists). + * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { @@ -301,26 +311,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, goto out_free; } - if (copy_from_user(buf, user_buf, len)) { - len = -EFAULT; - goto out_unlock; - } - buf[len] = '\0'; /* guarantee string termination */ - ops = kernfs_ops(of->kn); if (ops->write) len = ops->write(of, buf, len, *ppos); else len = -EINVAL; + kernfs_put_active(of->kn); + mutex_unlock(&of->mutex); + if (len > 0) *ppos += len; -out_unlock: - kernfs_put_active(of->kn); - mutex_unlock(&of->mutex); out_free: - if (buf != of->prealloc_buf) + if (buf == of->prealloc_buf) + mutex_unlock(&of->prealloc_mutex); + else kfree(buf); return len; } @@ -687,6 +693,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) error = -ENOMEM; if (!of->prealloc_buf) goto err_free; + mutex_init(&of->prealloc_mutex); } /* diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index c06c44242f39..d306e282bb1d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -177,6 +177,7 @@ struct kernfs_open_file { /* private fields, do not use outside kernfs proper */ struct mutex mutex; + struct mutex prealloc_mutex; int event; struct list_head list; char *prealloc_buf; -- cgit v1.2.3