From d62d813c0d714a2d0aaf3d796a7a51ae60bf5470 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Fri, 3 Nov 2017 13:36:42 +0000 Subject: cpufreq: schedutil: Examine the correct CPU when we update util After commit 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) we stopped to always read the utilization for the CPU we are running the governor on, and instead we read it for the CPU which we've been told has updated utilization. This is stored in sugov_cpu->cpu. The value is set in sugov_register() but we clear it in sugov_start() which leads to always looking at the utilization of CPU0 instead of the correct one. Fix this by consolidating the initialization code into sugov_start(). Fixes: 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) Signed-off-by: Chris Redpath Reviewed-by: Patrick Bellasi Reviewed-by: Brendan Jackman Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9209d83ecdcf..ba0da243fdd8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -649,6 +649,7 @@ static int sugov_start(struct cpufreq_policy *policy) struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); memset(sg_cpu, 0, sizeof(*sg_cpu)); + sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; @@ -714,11 +715,6 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(sugov_cpu, cpu).cpu = cpu; - return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); -- cgit v1.2.3 From c0f3ea1589394deac2d840c685f57c69e4ac4243 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Nov 2017 12:51:04 -0800 Subject: stop using '%pK' for /proc/kallsyms pointer values Not only is it annoying to have one single flag for all pointers, as if that was a global choice and all kernel pointers are the same, but %pK can't get the 'access' vs 'open' time check right anyway. So make the /proc/kallsyms pointer value code use logic specific to that particular file. We do continue to honor kptr_restrict, but the default (which is unrestricted) is changed to instead take expected users into account, and restrict access by default. Right now the only actual expected user is kernel profiling, which has a separate sysctl flag for kernel profile access. There may be others. Signed-off-by: Linus Torvalds --- kernel/kallsyms.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cfafa55..51b49ed452e4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -480,6 +480,7 @@ struct kallsym_iter { char name[KSYM_NAME_LEN]; char module_name[MODULE_NAME_LEN]; int exported; + int show_value; }; static int get_ksymbol_mod(struct kallsym_iter *iter) @@ -580,14 +581,23 @@ static void s_stop(struct seq_file *m, void *p) { } +#ifndef CONFIG_64BIT +# define KALLSYM_FMT "%08lx" +#else +# define KALLSYM_FMT "%016lx" +#endif + static int s_show(struct seq_file *m, void *p) { + unsigned long value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; + value = iter->show_value ? iter->value : 0; + if (iter->module_name[0]) { char type; @@ -597,10 +607,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, "%pK %c %s\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\n", value, iter->type, iter->name); return 0; } @@ -612,6 +622,40 @@ static const struct seq_operations kallsyms_op = { .show = s_show }; +static inline int kallsyms_for_perf(void) +{ +#ifdef CONFIG_PERF_EVENTS + extern int sysctl_perf_event_paranoid; + if (sysctl_perf_event_paranoid <= 1) + return 1; +#endif + return 0; +} + +/* + * We show kallsyms information even to normal users if we've enabled + * kernel profiling and are explicitly not paranoid (so kptr_restrict + * is clear, and sysctl_perf_event_paranoid isn't set). + * + * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to + * block even that). + */ +static int kallsyms_show_value(void) +{ + switch (kptr_restrict) { + case 0: + if (kallsyms_for_perf()) + return 1; + /* fallthrough */ + case 1: + if (has_capability_noaudit(current, CAP_SYSLOG)) + return 1; + /* fallthrough */ + default: + return 0; + } +} + static int kallsyms_open(struct inode *inode, struct file *file) { /* @@ -625,6 +669,7 @@ static int kallsyms_open(struct inode *inode, struct file *file) return -ENOMEM; reset_iter(iter, 0); + iter->show_value = kallsyms_show_value(); return 0; } -- cgit v1.2.3 From 277642dcca765a1955d4c753a5a315ff7f2eb09d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 17:00:53 -0800 Subject: modules: make sysfs attribute files readable by owner only This code goes back to the historical bitkeeper tree commit 3f7b0672086 ("Module section offsets in /sys/module"), where Jonathan Corbet wanted to show people how to debug loadable modules. See https://lwn.net/Articles/88052/ from June 2004. To expose the required load address information, Jonathan added the sections subdirectory for every module in /sys/modules, and made them S_IRUGO - readable by everybody. It was a more innocent time, plus those S_IRxxx macro names are a lot more confusing than the octal numbers are, so maybe it wasn't even intentional. But here we are, thirteen years later, and I'll just change it to S_IRUSR instead. Let's see if anybody even notices. Cc: Jonathan Corbet Signed-off-by: Linus Torvalds --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..fdb3a6aca363 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1516,7 +1516,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.mode = S_IRUGO; + sattr->mattr.attr.mode = S_IRUSR; *(gattr++) = &(sattr++)->mattr.attr; } *gattr = NULL; -- cgit v1.2.3 From 516fb7f2e73dcc303fb97fc3593209fcacf2d982 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 18:44:23 -0800 Subject: /proc/module: use the same logic as /proc/kallsyms for address exposure The (alleged) users of the module addresses are the same: kernel profiling. So just expose the same helper and format macros, and unify the logic. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 8 ++++++++ kernel/kallsyms.c | 8 +------- kernel/module.c | 20 ++++++++++++++++++-- 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 11dd93e42580..0a777c5216b1 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,6 +14,14 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +/* How and when do we show kallsyms values? */ +extern int kallsyms_show_value(void); +#ifndef CONFIG_64BIT +# define KALLSYM_FMT "%08lx" +#else +# define KALLSYM_FMT "%016lx" +#endif + struct module; #ifdef CONFIG_KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 51b49ed452e4..1e6ae66c6244 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -581,12 +581,6 @@ static void s_stop(struct seq_file *m, void *p) { } -#ifndef CONFIG_64BIT -# define KALLSYM_FMT "%08lx" -#else -# define KALLSYM_FMT "%016lx" -#endif - static int s_show(struct seq_file *m, void *p) { unsigned long value; @@ -640,7 +634,7 @@ static inline int kallsyms_for_perf(void) * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to * block even that). */ -static int kallsyms_show_value(void) +int kallsyms_show_value(void) { switch (kptr_restrict) { case 0: diff --git a/kernel/module.c b/kernel/module.c index fdb3a6aca363..0122747ba150 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4147,6 +4147,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; + unsigned long value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4162,7 +4163,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->core_layout.base); + value = m->private ? 0 : (unsigned long)mod->core_layout.base; + seq_printf(m, " 0x" KALLSYM_FMT, value); /* Taints info */ if (mod->taints) @@ -4184,9 +4186,23 @@ static const struct seq_operations modules_op = { .show = m_show }; +/* + * This also sets the "private" pointer to non-NULL if the + * kernel pointers should be hidden (so you can just test + * "m->private" to see if you should keep the values private). + * + * We use the same logic as for /proc/kallsyms. + */ static int modules_open(struct inode *inode, struct file *file) { - return seq_open(file, &modules_op); + int err = seq_open(file, &modules_op); + + if (!err) { + struct seq_file *m = file->private_data; + m->private = kallsyms_show_value() ? NULL : (void *)8ul; + } + + return 0; } static const struct file_operations proc_modules_operations = { -- cgit v1.2.3