summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/fair.c63
-rw-r--r--kernel/sched/sched.h19
2 files changed, 79 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0f32acb05055..4c4ea474fdc2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8824,16 +8824,73 @@ static unsigned long scale_rt_capacity(int cpu)
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
+ unsigned long capacity_orig = arch_scale_cpu_capacity(cpu);
unsigned long capacity = scale_rt_capacity(cpu);
struct sched_group *sdg = sd->groups;
+ struct rq *rq = cpu_rq(cpu);
- cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
+ rq->cpu_capacity_orig = capacity_orig;
if (!capacity)
capacity = 1;
- cpu_rq(cpu)->cpu_capacity = capacity;
- trace_sched_cpu_capacity_tp(cpu_rq(cpu));
+ rq->cpu_capacity = capacity;
+
+ /*
+ * Detect if the performance domain is in capacity inversion state.
+ *
+ * Capacity inversion happens when another perf domain with equal or
+ * lower capacity_orig_of() ends up having higher capacity than this
+ * domain after subtracting thermal pressure.
+ *
+ * We only take into account thermal pressure in this detection as it's
+ * the only metric that actually results in *real* reduction of
+ * capacity due to performance points (OPPs) being dropped/become
+ * unreachable due to thermal throttling.
+ *
+ * We assume:
+ * * That all cpus in a perf domain have the same capacity_orig
+ * (same uArch).
+ * * Thermal pressure will impact all cpus in this perf domain
+ * equally.
+ */
+ if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
+ struct perf_domain *pd = rcu_dereference(rq->rd->pd);
+
+ rq->cpu_capacity_inverted = 0;
+
+ for (; pd; pd = pd->next) {
+ struct cpumask *pd_span = perf_domain_span(pd);
+ unsigned long pd_cap_orig, pd_cap;
+
+ cpu = cpumask_any(pd_span);
+ pd_cap_orig = arch_scale_cpu_capacity(cpu);
+
+ if (capacity_orig < pd_cap_orig)
+ continue;
+
+ /*
+ * handle the case of multiple perf domains have the
+ * same capacity_orig but one of them is under higher
+ * thermal pressure. We record it as capacity
+ * inversion.
+ */
+ if (capacity_orig == pd_cap_orig) {
+ pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu));
+
+ if (pd_cap > inv_cap) {
+ rq->cpu_capacity_inverted = inv_cap;
+ break;
+ }
+ } else if (pd_cap_orig > inv_cap) {
+ rq->cpu_capacity_inverted = inv_cap;
+ break;
+ }
+ }
+ }
+
+ trace_sched_cpu_capacity_tp(rq);
sdg->sgc->capacity = capacity;
sdg->sgc->min_capacity = capacity;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d6d488e8eb55..5f18460f62f0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1041,6 +1041,7 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
+ unsigned long cpu_capacity_inverted;
struct balance_callback *balance_callback;
@@ -2878,6 +2879,24 @@ static inline unsigned long capacity_orig_of(int cpu)
return cpu_rq(cpu)->cpu_capacity_orig;
}
+/*
+ * Returns inverted capacity if the CPU is in capacity inversion state.
+ * 0 otherwise.
+ *
+ * Capacity inversion detection only considers thermal impact where actual
+ * performance points (OPPs) gets dropped.
+ *
+ * Capacity inversion state happens when another performance domain that has
+ * equal or lower capacity_orig_of() becomes effectively larger than the perf
+ * domain this CPU belongs to due to thermal pressure throttling it hard.
+ *
+ * See comment in update_cpu_capacity().
+ */
+static inline unsigned long cpu_in_capacity_inversion(int cpu)
+{
+ return cpu_rq(cpu)->cpu_capacity_inverted;
+}
+
/**
* enum cpu_util_type - CPU utilization type
* @FREQUENCY_UTIL: Utilization used to select frequency