From 6473ea760ca1707ade74de5b57e74189d14f8e10 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:09 +0200 Subject: fsnotify: tidy up FS_ and FAN_ constants Order by value, so the free value ranges are easier to find. Link: https://lore.kernel.org/r/20200319151022.31456-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b9effa6f8503..2a1844edda47 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,9 +25,9 @@ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ -#define FAN_ONDIR 0x40000000 /* event occurred against dir */ +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ -- cgit v1.2.3 From 9e2ba2c34f1922ca1e0c7d31b30ace5842c2e7d1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:19 +0200 Subject: fanotify: send FAN_DIR_MODIFY event flavor with dir inode and name Dirent events are going to be supported in two flavors: 1. Directory fid info + mask that includes the specific event types (e.g. FAN_CREATE) and an optional FAN_ONDIR flag. 2. Directory fid info + name + mask that includes only FAN_DIR_MODIFY. To request the second event flavor, user needs to set the event type FAN_DIR_MODIFY in the mark mask. The first flavor is supported since kernel v5.1 for groups initialized with flag FAN_REPORT_FID. It is intended to be used for watching directories in "batch mode" - the watcher is notified when directory is changed and re-scans the directory content in response. This event flavor is stored more compactly in the event queue, so it is optimal for workloads with frequent directory changes. The second event flavor is intended to be used for watching large directories, where the cost of re-scan of the directory on every change is considered too high. The watcher getting the event with the directory fid and entry name is expected to call fstatat(2) to query the content of the entry after the change. Legacy inotify events are reported with name and event mask (e.g. "foo", FAN_CREATE | FAN_ONDIR). That can lead users to the conclusion that there is *currently* an entry "foo" that is a sub-directory, when in fact "foo" may be negative or non-dir by the time user gets the event. To make it clear that the current state of the named entry is unknown, when reporting an event with name info, fanotify obfuscates the specific event types (e.g. create,delete,rename) and uses a common event type - FAN_DIR_MODIFY to describe the change. This should make it harder for users to make wrong assumptions and write buggy filesystem monitors. At this point, name info reporting is not yet implemented, so trying to set FAN_DIR_MODIFY in mark mask will return -EINVAL. Link: https://lore.kernel.org/r/20200319151022.31456-12-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 7 ++++--- fs/notify/fsnotify.c | 2 +- include/linux/fsnotify.h | 6 ++++++ include/linux/fsnotify_backend.h | 4 +++- include/uapi/linux/fanotify.h | 1 + 5 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 39eb71f7c413..74676228f784 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -235,9 +235,9 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, test_mask = event_mask & marks_mask & ~marks_ignored_mask; /* - * dirent modification events (create/delete/move) do not carry the - * child entry name/inode information. Instead, we report FAN_ONDIR - * for mkdir/rmdir so user can differentiate them from creat/unlink. + * For dirent modification events (create/delete/move) that do not carry + * the child entry name information, we report FAN_ONDIR for mkdir/rmdir + * so user can differentiate them from creat/unlink. * * For backward compatibility and consistency, do not report FAN_ONDIR * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR @@ -463,6 +463,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); BUILD_BUG_ON(FAN_CREATE != FS_CREATE); BUILD_BUG_ON(FAN_DELETE != FS_DELETE); + BUILD_BUG_ON(FAN_DIR_MODIFY != FS_DIR_MODIFY); BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 193530f57963..72d332ce8e12 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -383,7 +383,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 860018f3e545..5ab28f6c7d26 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -30,6 +30,12 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, const struct qstr *name, u32 cookie) { fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); + /* + * Send another flavor of the event without child inode data and + * without the specific event type (e.g. FS_CREATE|FS_IS_DIR). + * The name is relative to the dir inode the event is reported to. + */ + fsnotify(dir, FS_DIR_MODIFY, dir, FSNOTIFY_EVENT_INODE, name, 0); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c72cbea20ef7..f0c506405b54 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -47,6 +47,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* This inode cares about things that happen to its children. Always set for @@ -66,7 +67,8 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | \ + FS_DIR_MODIFY) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 2a1844edda47..615fa2c87179 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -24,6 +24,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +#define FAN_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -- cgit v1.2.3 From 44d705b0370b1d581f46ff23e5d33e8b5ff8ec58 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:22 +0200 Subject: fanotify: report name info for FAN_DIR_MODIFY event Report event FAN_DIR_MODIFY with name in a variable length record similar to how fid's are reported. With name info reporting implemented, setting FAN_DIR_MODIFY in mark mask is now allowed. When events are reported with name, the reported fid identifies the directory and the name follows the fid. The info record type for this event info is FAN_EVENT_INFO_TYPE_DFID_NAME. For now, all reported events have at most one info record which is either FAN_EVENT_INFO_TYPE_FID or FAN_EVENT_INFO_TYPE_DFID_NAME (for FAN_DIR_MODIFY). Later on, events "on child" will report both records. There are several ways that an application can use this information: 1. When watching a single directory, the name is always relative to the watched directory, so application need to fstatat(2) the name relative to the watched directory. 2. When watching a set of directories, the application could keep a map of dirfd for all watched directories and hash the map by fid obtained with name_to_handle_at(2). When getting a name event, the fid in the event info could be used to lookup the base dirfd in the map and then call fstatat(2) with that dirfd. 3. When watching a filesystem (FAN_MARK_FILESYSTEM) or a large set of directories, the application could use open_by_handle_at(2) with the fid in event info to obtain dirfd for the directory where event happened and call fstatat(2) with this dirfd. The last option scales better for a large number of watched directories. The first two options may be available in the future also for non privileged fanotify watchers, because open_by_handle_at(2) requires the CAP_DAC_READ_SEARCH capability. Link: https://lore.kernel.org/r/20200319151022.31456-15-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fanotify/fanotify_user.c | 117 ++++++++++++++++++++++++++++--------- include/linux/fanotify.h | 3 +- include/uapi/linux/fanotify.h | 8 ++- 4 files changed, 100 insertions(+), 30 deletions(-) (limited to 'include/uapi') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 599654564b2a..4c1a4eb597d5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -520,7 +520,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20); mask = fanotify_group_event_mask(group, iter_info, mask, data, data_type); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a9d287a56098..42cb794c62ac 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -51,22 +51,35 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 +#define FANOTIFY_INFO_HDR_LEN \ + (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) -static int fanotify_fid_info_len(int fh_len) +static int fanotify_fid_info_len(int fh_len, int name_len) { - return roundup(sizeof(struct fanotify_event_info_fid) + - sizeof(struct file_handle) + fh_len, - FANOTIFY_EVENT_ALIGN); + int info_len = fh_len; + + if (name_len) + info_len += name_len + 1; + + return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN); } static int fanotify_event_info_len(struct fanotify_event *event) { + int info_len = 0; int fh_len = fanotify_event_object_fh_len(event); - if (!fh_len) - return 0; + if (fh_len) + info_len += fanotify_fid_info_len(fh_len, 0); - return fanotify_fid_info_len(fh_len); + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + info_len += fanotify_fid_info_len(fne->dir_fh.len, + fne->name_len); + } + + return info_len; } /* @@ -204,23 +217,32 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } -static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, - char __user *buf) +static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, + const char *name, size_t name_len, + char __user *buf, size_t count) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; size_t fh_len = fh ? fh->len : 0; - size_t len = fanotify_fid_info_len(fh_len); + size_t info_len = fanotify_fid_info_len(fh_len, name_len); + size_t len = info_len; + + pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", + __func__, fh_len, name_len, info_len, count); - if (!len) + if (!fh_len || (name && !name_len)) return 0; - if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; - /* Copy event info fid header followed by vaiable sized file handle */ - info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + /* + * Copy event info fid header followed by variable sized file handle + * and optionally followed by variable sized filename. + */ + info.hdr.info_type = name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : + FAN_EVENT_INFO_TYPE_FID; info.hdr.len = len; info.fsid = *fsid; if (copy_to_user(buf, &info, sizeof(info))) @@ -228,6 +250,9 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, buf += sizeof(info); len -= sizeof(info); + if (WARN_ON_ONCE(len < sizeof(handle))) + return -EFAULT; + handle.handle_type = fh->type; handle.handle_bytes = fh_len; if (copy_to_user(buf, &handle, sizeof(handle))) @@ -235,9 +260,12 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, buf += sizeof(handle); len -= sizeof(handle); + if (WARN_ON_ONCE(len < fh_len)) + return -EFAULT; + /* - * For an inline fh, copy through stack to exclude the copy from - * usercopy hardening protections. + * For an inline fh and inline file name, copy through stack to exclude + * the copy from usercopy hardening protections. */ fh_buf = fanotify_fh_buf(fh); if (fh_len <= FANOTIFY_INLINE_FH_LEN) { @@ -247,14 +275,28 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, if (copy_to_user(buf, fh_buf, fh_len)) return -EFAULT; - /* Pad with 0's */ buf += fh_len; len -= fh_len; + + if (name_len) { + /* Copy the filename with terminating null */ + name_len++; + if (WARN_ON_ONCE(len < name_len)) + return -EFAULT; + + if (copy_to_user(buf, name, name_len)) + return -EFAULT; + + buf += name_len; + len -= name_len; + } + + /* Pad with 0's */ WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; - return 0; + return info_len; } static ssize_t copy_event_to_user(struct fsnotify_group *group, @@ -268,16 +310,15 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.event_len = FAN_EVENT_METADATA_LEN + + fanotify_event_info_len(event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (fanotify_event_object_fh(event)) { - metadata.event_len += fanotify_event_info_len(event); - } else if (path && path->mnt && path->dentry) { + if (path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); if (fd < 0) return fd; @@ -295,17 +336,39 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; + buf += FAN_EVENT_METADATA_LEN; + count -= FAN_EVENT_METADATA_LEN; + if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) { + if (f) fd_install(fd, f); - } else if (fanotify_event_object_fh(event)) { - ret = copy_fid_to_user(fanotify_event_fsid(event), - fanotify_event_object_fh(event), - buf + FAN_EVENT_METADATA_LEN); + + /* Event info records order is: dir fid + name, child fid */ + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_dir_fh(event), + fne->name, fne->name_len, + buf, count); if (ret < 0) return ret; + + buf += ret; + count -= ret; + } + + if (fanotify_event_object_fh_len(event)) { + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_object_fh(event), + NULL, 0, buf, count); + if (ret < 0) + return ret; + + buf += ret; + count -= ret; } return metadata.event_len; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b79fa9bb7359..3049a6c06d9e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,7 +47,8 @@ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_DIR_MODIFY) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 615fa2c87179..a88c7c6d0692 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -117,6 +117,7 @@ struct fanotify_event_metadata { }; #define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -125,7 +126,12 @@ struct fanotify_event_info_header { __u16 len; }; -/* Unique file identifier info record */ +/* + * Unique file identifier info record. This is used both for + * FAN_EVENT_INFO_TYPE_FID records and for FAN_EVENT_INFO_TYPE_DFID_NAME + * records. For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null + * terminated name immediately after the file handle. + */ struct fanotify_event_info_fid { struct fanotify_event_info_header hdr; __kernel_fsid_t fsid; -- cgit v1.2.3