summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Slaby <jslaby@suse.cz>2010-05-04 18:03:50 +0200
committerJiri Slaby <jirislaby@gmail.com>2010-07-16 09:48:48 +0200
commitc022a0acad534fd5f5d5f17280f6d4d135e74e81 (patch)
treeb67cb6f1dd5ca1625b3699577b781d8058a37e93
parentb95183453af2ed14a5c7027e58049c9fd17e92ce (diff)
downloadlinux-c022a0acad534fd5f5d5f17280f6d4d135e74e81.tar.bz2
rlimits: implement prlimit64 syscall
This patch adds the code to support the sys_prlimit64 syscall which modifies-and-returns the rlim values of a selected process atomically. The first parameter, pid, being 0 means current process. Unlike the current implementation, it is a generic interface, architecture indepentent so that we needn't handle compat stuff anymore. In the future, after glibc start to use this we can deprecate sys_setrlimit and sys_getrlimit in favor to clean up the code finally. It also adds a possibility of changing limits of other processes. We check the user's permissions to do that and if it succeeds, the new limits are propagated online. This is good for large scale applications such as SAP or databases where administrators need to change limits time by time (e.g. on crashes increase core size). And it is unacceptable to restart the service. For safety, all rlim users now either use accessors or doesn't need them due to - locking - the fact a process was just forked and nobody else knows about it yet (and nobody can't thus read/write limits) hence it is safe to modify limits now. The limitation is that we currently stay at ulong internal representation. So the rlim64_is_infinity check is used where value is compared against ULONG_MAX on 32-bit which is the maximum value there. And since internally the limits are held in struct rlimit, converters which are used before and after do_prlimit call in sys_prlimit64 are introduced. Signed-off-by: Jiri Slaby <jslaby@suse.cz>
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--kernel/sys.c94
2 files changed, 98 insertions, 0 deletions
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7f614ce274a9..a60943be4270 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -35,6 +35,7 @@ struct oldold_utsname;
struct old_utsname;
struct pollfd;
struct rlimit;
+struct rlimit64;
struct rusage;
struct sched_param;
struct sel_arg_struct;
@@ -644,6 +645,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
#endif
asmlinkage long sys_setrlimit(unsigned int resource,
struct rlimit __user *rlim);
+asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
+ const struct rlimit64 __user *new_rlim,
+ struct rlimit64 __user *old_rlim);
asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
asmlinkage long sys_umask(int mask);
diff --git a/kernel/sys.c b/kernel/sys.c
index 9da98dd47276..e9ad44489828 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1271,6 +1271,39 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
#endif
+static inline bool rlim64_is_infinity(__u64 rlim64)
+{
+#if BITS_PER_LONG < 64
+ return rlim64 >= ULONG_MAX;
+#else
+ return rlim64 == RLIM64_INFINITY;
+#endif
+}
+
+static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
+{
+ if (rlim->rlim_cur == RLIM_INFINITY)
+ rlim64->rlim_cur = RLIM64_INFINITY;
+ else
+ rlim64->rlim_cur = rlim->rlim_cur;
+ if (rlim->rlim_max == RLIM_INFINITY)
+ rlim64->rlim_max = RLIM64_INFINITY;
+ else
+ rlim64->rlim_max = rlim->rlim_max;
+}
+
+static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
+{
+ if (rlim64_is_infinity(rlim64->rlim_cur))
+ rlim->rlim_cur = RLIM_INFINITY;
+ else
+ rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
+ if (rlim64_is_infinity(rlim64->rlim_max))
+ rlim->rlim_max = RLIM_INFINITY;
+ else
+ rlim->rlim_max = (unsigned long)rlim64->rlim_max;
+}
+
/* make sure you are allowed to change @tsk limits before calling this */
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim)
@@ -1336,6 +1369,67 @@ out:
return retval;
}
+/* rcu lock must be held */
+static int check_prlimit_permission(struct task_struct *task)
+{
+ const struct cred *cred = current_cred(), *tcred;
+
+ tcred = __task_cred(task);
+ if ((cred->uid != tcred->euid ||
+ cred->uid != tcred->suid ||
+ cred->uid != tcred->uid ||
+ cred->gid != tcred->egid ||
+ cred->gid != tcred->sgid ||
+ cred->gid != tcred->gid) &&
+ !capable(CAP_SYS_RESOURCE)) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
+ const struct rlimit64 __user *, new_rlim,
+ struct rlimit64 __user *, old_rlim)
+{
+ struct rlimit64 old64, new64;
+ struct rlimit old, new;
+ struct task_struct *tsk;
+ int ret;
+
+ if (new_rlim) {
+ if (copy_from_user(&new64, new_rlim, sizeof(new64)))
+ return -EFAULT;
+ rlim64_to_rlim(&new64, &new);
+ }
+
+ rcu_read_lock();
+ tsk = pid ? find_task_by_vpid(pid) : current;
+ if (!tsk) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+ ret = check_prlimit_permission(tsk);
+ if (ret) {
+ rcu_read_unlock();
+ return ret;
+ }
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
+ old_rlim ? &old : NULL);
+
+ if (!ret && old_rlim) {
+ rlim_to_rlim64(&old, &old64);
+ if (copy_to_user(old_rlim, &old64, sizeof(old64)))
+ ret = -EFAULT;
+ }
+
+ put_task_struct(tsk);
+ return ret;
+}
+
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit new_rlim;