From 05484e0984487d42e97c417cbb0697fa9d16e7e9 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Fri, 20 Jul 2018 14:32:31 +0100 Subject: sched/topology: Add SD_ASYM_CPUCAPACITY flag detection The SD_ASYM_CPUCAPACITY sched_domain flag is supposed to mark the sched_domain in the hierarchy where all CPU capacities are visible for any CPU's point of view on asymmetric CPU capacity systems. The scheduler can then take to take capacity asymmetry into account when balancing at this level. It also serves as an indicator for how wide task placement heuristics have to search to consider all available CPU capacities as asymmetric systems might often appear symmetric at smallest level(s) of the sched_domain hierarchy. The flag has been around for while but so far only been set by out-of-tree code in Android kernels. One solution is to let each architecture provide the flag through a custom sched_domain topology array and associated mask and flag functions. However, SD_ASYM_CPUCAPACITY is special in the sense that it depends on the capacity and presence of all CPUs in the system, i.e. when hotplugging all CPUs out except those with one particular CPU capacity the flag should disappear even if the sched_domains don't collapse. Similarly, the flag is affected by cpusets where load-balancing is turned off. Detecting when the flags should be set therefore depends not only on topology information but also the cpuset configuration and hotplug state. The arch code doesn't have easy access to the cpuset configuration. Instead, this patch implements the flag detection in generic code where cpusets and hotplug state is already taken care of. All the arch is responsible for is to implement arch_scale_cpu_capacity() and force a full rebuild of the sched_domain hierarchy if capacities are updated, e.g. later in the boot process when cpufreq has initialized. Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1532093554-30504-2-git-send-email-morten.rasmussen@arm.com [ Fixed 'CPU' capitalization. ] Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 26347741ba50..6b9976180c1e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -23,10 +23,10 @@ #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ +#define SD_ASYM_CPUCAPACITY 0x0040 /* Domain members have different CPU capacities */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share CPU capacity */ #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ +#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share CPU pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -- cgit v1.2.3 From bb1fbdd3c3fd12b612c7d8cdf13bd6bfeebdefa3 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Fri, 20 Jul 2018 14:32:32 +0100 Subject: sched/topology, drivers/base/arch_topology: Rebuild the sched_domain hierarchy when capacities change The setting of SD_ASYM_CPUCAPACITY depends on the per-CPU capacities. These might not have their final values when the hierarchy is initially built as the values depend on cpufreq to be initialized or the values being set through sysfs. To ensure that the flags are set correctly we need to rebuild the sched_domain hierarchy whenever the reported per-CPU capacity (arch_scale_cpu_capacity()) changes. This patch ensure that a full sched_domain rebuild happens when CPU capacity changes occur. Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1532093554-30504-3-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar --- drivers/base/arch_topology.c | 26 ++++++++++++++++++++++++++ include/linux/arch_topology.h | 1 + 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index e7cb0c6ade81..edfcf8d982e4 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -15,6 +15,7 @@ #include #include #include +#include DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; @@ -47,6 +48,9 @@ static ssize_t cpu_capacity_show(struct device *dev, return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); } +static void update_topology_flags_workfn(struct work_struct *work); +static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); + static ssize_t cpu_capacity_store(struct device *dev, struct device_attribute *attr, const char *buf, @@ -72,6 +76,8 @@ static ssize_t cpu_capacity_store(struct device *dev, topology_set_cpu_scale(i, new_capacity); mutex_unlock(&cpu_scale_mutex); + schedule_work(&update_topology_flags_work); + return count; } @@ -96,6 +102,25 @@ static int register_cpu_capacity_sysctl(void) } subsys_initcall(register_cpu_capacity_sysctl); +static int update_topology; + +int topology_update_cpu_topology(void) +{ + return update_topology; +} + +/* + * Updating the sched_domains can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void update_topology_flags_workfn(struct work_struct *work) +{ + update_topology = 1; + rebuild_sched_domains(); + pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); + update_topology = 0; +} + static u32 capacity_scale; static u32 *raw_capacity; @@ -201,6 +226,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, if (cpumask_empty(cpus_to_visit)) { topology_normalize_cpu_scale(); + schedule_work(&update_topology_flags_work); free_raw_capacity(); pr_debug("cpu_capacity: parsing done\n"); schedule_work(&parsing_done_work); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 2b709416de05..d9bdc1a7f4e7 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -9,6 +9,7 @@ #include void topology_normalize_cpu_scale(void); +int topology_update_cpu_topology(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); -- cgit v1.2.3 From ff28915fd31ccafc0d38e6f84b66df280ed9e86a Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 5 Sep 2018 11:36:36 +0200 Subject: sched/debug: Use symbolic names for task state constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/trace/events/sched.h includes (via ) and so knows about the TASK_* constants used to interpret .prev_state. So instead of duplicating the magic numbers make use of the defined macros to ease understanding the mapping from state bits to letters which isn't completely intuitive for an outsider. Signed-off-by: Uwe Kleine-König Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/20180905093636.24068-1-u.kleine-koenig@pengutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0be866c91f62..f07b270d4fc4 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -159,9 +159,14 @@ TRACE_EVENT(sched_switch, (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", - { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, - { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, - { 0x40, "P" }, { 0x80, "I" }) : + { TASK_INTERRUPTIBLE, "S" }, + { TASK_UNINTERRUPTIBLE, "D" }, + { __TASK_STOPPED, "T" }, + { __TASK_TRACED, "t" }, + { EXIT_DEAD, "X" }, + { EXIT_ZOMBIE, "Z" }, + { TASK_PARKED, "P" }, + { TASK_DEAD, "I" }) : "R", __entry->prev_state & TASK_REPORT_MAX ? "+" : "", -- cgit v1.2.3