From 1cce1eea0aff51201753fcaca421df825b0813b6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 14 Dec 2016 15:56:33 +0200 Subject: inotify: Convert to using per-namespace limits This patchset converts inotify to using the newly introduced per-userns sysctl infrastructure. Currently the inotify instances/watches are being accounted in the user_struct structure. This means that in setups where multiple users in unprivileged containers map to the same underlying real user (i.e. pointing to the same user_struct) the inotify limits are going to be shared as well, allowing one user(or application) to exhaust all others limits. Fix this by switching the inotify sysctls to using the per-namespace/per-user limits. This will allow the server admin to set sensible global limits, which can further be tuned inside every individual user namespace. Additionally, in order to preserve the sysctl ABI make the existing inotify instances/watches sysctls modify the values of the initial user namespace. Signed-off-by: Nikolay Borisov Acked-by: Jan Kara Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/notify/inotify/inotify.h | 17 +++++++++++++++++ fs/notify/inotify/inotify_fsnotify.c | 6 ++---- fs/notify/inotify/inotify_user.c | 34 +++++++++++++++++----------------- 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'fs/notify') diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index a6f5907a3fee..7c461fd49c4c 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group, const unsigned char *file_name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; + +#ifdef CONFIG_INOTIFY_USER +static inline void dec_inotify_instances(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES); +} + +static inline struct ucounts *inc_inotify_watches(struct ucounts *ucounts) +{ + return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES); +} + +static inline void dec_inotify_watches(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES); +} +#endif diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 19e7ec109a75..f36c29398de3 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group) /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); - if (group->inotify_data.user) { - atomic_dec(&group->inotify_data.user->inotify_devs); - free_uid(group->inotify_data.user); - } + if (group->inotify_data.ucounts) + dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_event *fsn_event) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 69d1ea3d292a..1cf41c623be1 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -44,10 +44,8 @@ #include -/* these are configurable via /proc/sys/fs/inotify/ */ -static int inotify_max_user_instances __read_mostly; +/* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -static int inotify_max_user_watches __read_mostly; static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; @@ -60,7 +58,7 @@ static int zero; struct ctl_table inotify_table[] = { { .procname = "max_user_instances", - .data = &inotify_max_user_instances, + .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = { }, { .procname = "max_user_watches", - .data = &inotify_max_user_watches, + .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* remove this mark from the idr */ inotify_remove_from_idr(group, i_mark); - atomic_dec(&group->inotify_data.user->inotify_watches); + dec_inotify_watches(group->inotify_data.ucounts); } /* ding dong the mark is dead */ @@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group, tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; - ret = -ENOSPC; - if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) - goto out_err; - ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); if (ret) goto out_err; + /* increment the number of watches the user has */ + if (!inc_inotify_watches(group->inotify_data.ucounts)) { + inotify_remove_from_idr(group, tmp_i_mark); + ret = -ENOSPC; + goto out_err; + } + /* we are on the idr, now get on the inode */ ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); @@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group, goto out_err; } - /* increment the number of watches the user has */ - atomic_inc(&group->inotify_data.user->inotify_watches); /* return the watch descriptor for this new mark */ ret = tmp_i_mark->wd; @@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); - group->inotify_data.user = get_current_user(); + group->inotify_data.ucounts = inc_ucount(current_user_ns(), + current_euid(), + UCOUNT_INOTIFY_INSTANCES); - if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > - inotify_max_user_instances) { + if (!group->inotify_data.ucounts) { fsnotify_destroy_group(group); return ERR_PTR(-EMFILE); } @@ -819,8 +819,8 @@ static int __init inotify_user_setup(void) inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); inotify_max_queued_events = 16384; - inotify_max_user_instances = 128; - inotify_max_user_watches = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; return 0; } -- cgit v1.2.3