diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 117 |
1 files changed, 79 insertions, 38 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 79f574dba096..e497c05aab7f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1139,6 +1139,47 @@ static unsigned int task_scan_max(struct task_struct *p) return max(smin, smax); } +void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) +{ + int mm_users = 0; + struct mm_struct *mm = p->mm; + + if (mm) { + mm_users = atomic_read(&mm->mm_users); + if (mm_users == 1) { + mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay); + mm->numa_scan_seq = 0; + } + } + p->node_stamp = 0; + p->numa_scan_seq = mm ? mm->numa_scan_seq : 0; + p->numa_scan_period = sysctl_numa_balancing_scan_delay; + p->numa_work.next = &p->numa_work; + p->numa_faults = NULL; + p->numa_group = NULL; + p->last_task_numa_placement = 0; + p->last_sum_exec_runtime = 0; + + /* New address space, reset the preferred nid */ + if (!(clone_flags & CLONE_VM)) { + p->numa_preferred_nid = -1; + return; + } + + /* + * New thread, keep existing numa_preferred_nid which should be copied + * already by arch_dup_task_struct but stagger when scans start. + */ + if (mm) { + unsigned int delay; + + delay = min_t(unsigned int, task_scan_max(current), + current->numa_scan_period * mm_users * NSEC_PER_MSEC); + delay += 2 * TICK_NSEC; + p->node_stamp = delay; + } +} + static void account_numa_enqueue(struct rq *rq, struct task_struct *p) { rq->nr_numa_running += (p->numa_preferred_nid != -1); @@ -5345,6 +5386,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; /* + * The code below (indirectly) updates schedutil which looks at + * the cfs_rq utilization to select a frequency. + * Let's add the task's estimated utilization to the cfs_rq's + * estimated utilization, before we update schedutil. + */ + util_est_enqueue(&rq->cfs, p); + + /* * If in_iowait is set, the code below may not trigger any cpufreq * utilization updates, so do it here explicitly with the IOWAIT flag * passed. @@ -5385,7 +5434,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) add_nr_running(rq, 1); - util_est_enqueue(&rq->cfs, p); hrtick_update(rq); } @@ -5858,8 +5906,8 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync) * a cpufreq perspective, it's better to have higher utilisation * on one CPU. */ - if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) - return idle_cpu(prev_cpu) ? prev_cpu : this_cpu; + if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) + return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu; if (sync && cpu_rq(this_cpu)->nr_running == 1) return this_cpu; @@ -6102,7 +6150,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { - if (idle_cpu(i)) { + if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { @@ -6144,6 +6192,13 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) return prev_cpu; + /* + * We need task's util for capacity_spare_wake, sync it up to prev_cpu's + * last_update_time. + */ + if (!(sd_flag & SD_BALANCE_FORK)) + sync_entity_load_avg(&p->se); + while (sd) { struct sched_group *group; struct sched_domain *tmp; @@ -6224,7 +6279,7 @@ void __update_idle_core(struct rq *rq) if (cpu == core) continue; - if (!idle_cpu(cpu)) + if (!available_idle_cpu(cpu)) goto unlock; } @@ -6256,7 +6311,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int for_each_cpu(cpu, cpu_smt_mask(core)) { cpumask_clear_cpu(cpu, cpus); - if (!idle_cpu(cpu)) + if (!available_idle_cpu(cpu)) idle = false; } @@ -6285,7 +6340,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) + if (available_idle_cpu(cpu)) return cpu; } @@ -6348,7 +6403,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t return -1; if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) + if (available_idle_cpu(cpu)) break; } @@ -6368,13 +6423,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) struct sched_domain *sd; int i, recent_used_cpu; - if (idle_cpu(target)) + if (available_idle_cpu(target)) return target; /* * If the previous CPU is cache affine and idle, don't be stupid: */ - if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) + if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev)) return prev; /* Check a recently used CPU as a potential idle candidate: */ @@ -6382,7 +6437,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - idle_cpu(recent_used_cpu) && + available_idle_cpu(recent_used_cpu) && cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { /* * Replace recent_used_cpu with prev as it is a potential @@ -6558,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) static int select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags) { - struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; + struct sched_domain *tmp, *sd = NULL; int cpu = smp_processor_id(); int new_cpu = prev_cpu; int want_affine = 0; @@ -6581,7 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f */ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { - affine_sd = tmp; + if (cpu != prev_cpu) + new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync); + + sd = NULL; /* Prefer wake_affine over balance flags */ break; } @@ -6591,33 +6649,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f break; } - if (affine_sd) { - sd = NULL; /* Prefer wake_affine over balance flags */ - if (cpu == prev_cpu) - goto pick_cpu; - - new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync); - } - - if (sd && !(sd_flag & SD_BALANCE_FORK)) { - /* - * We're going to need the task's util for capacity_spare_wake - * in find_idlest_group. Sync it up to prev_cpu's - * last_update_time. - */ - sync_entity_load_avg(&p->se); - } + if (unlikely(sd)) { + /* Slow path */ + new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); + } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ + /* Fast path */ - if (!sd) { -pick_cpu: - if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ - new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); + new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); - if (want_affine) - current->recent_used_cpu = cpu; - } - } else { - new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); + if (want_affine) + current->recent_used_cpu = cpu; } rcu_read_unlock(); |