diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/fair.c | 100 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
2 files changed, 81 insertions, 21 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5e602e6ba0c3..74dc29ba1ad1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -259,7 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return grp->my_q; } -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, + int force_update); static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { @@ -281,7 +282,7 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->on_list = 1; /* We should have no load, but we need to update last_decay. */ - update_cfs_rq_blocked_load(cfs_rq); + update_cfs_rq_blocked_load(cfs_rq, 0); } } @@ -1086,17 +1087,19 @@ static __always_inline int __update_entity_runnable_avg(u64 now, } /* Synchronize an entity's decay with its parenting cfs_rq.*/ -static inline void __synchronize_entity_decay(struct sched_entity *se) +static inline u64 __synchronize_entity_decay(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 decays = atomic64_read(&cfs_rq->decay_counter); decays -= se->avg.decay_count; if (!decays) - return; + return 0; se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); se->avg.decay_count = 0; + + return decays; } /* Compute the current contribution to load_avg by se, return any delta */ @@ -1149,20 +1152,26 @@ static inline void update_entity_load_avg(struct sched_entity *se, * Decay the load contributed by all blocked children and account this so that * their contribution may appropriately discounted when they wake up. */ -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) { u64 now = rq_of(cfs_rq)->clock_task >> 20; u64 decays; decays = now - cfs_rq->last_decay; - if (!decays) + if (!decays && !force_update) return; - cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, - decays); - atomic64_add(decays, &cfs_rq->decay_counter); + if (atomic64_read(&cfs_rq->removed_load)) { + u64 removed_load = atomic64_xchg(&cfs_rq->removed_load, 0); + subtract_blocked_load_contrib(cfs_rq, removed_load); + } - cfs_rq->last_decay = now; + if (decays) { + cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, + decays); + atomic64_add(decays, &cfs_rq->decay_counter); + cfs_rq->last_decay = now; + } } static inline void update_rq_runnable_avg(struct rq *rq, int runnable) @@ -1175,20 +1184,42 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) { - /* we track migrations using entity decay_count == 0 */ - if (unlikely(!se->avg.decay_count)) { + /* + * We track migrations using entity decay_count <= 0, on a wake-up + * migration we use a negative decay count to track the remote decays + * accumulated while sleeping. + */ + if (unlikely(se->avg.decay_count <= 0)) { se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; + if (se->avg.decay_count) { + /* + * In a wake-up migration we have to approximate the + * time sleeping. This is because we can't synchronize + * clock_task between the two cpus, and it is not + * guaranteed to be read-safe. Instead, we can + * approximate this using our carried decays, which are + * explicitly atomically readable. + */ + se->avg.last_runnable_update -= (-se->avg.decay_count) + << 20; + update_entity_load_avg(se, 0); + /* Indicate that we're now synchronized and on-rq */ + se->avg.decay_count = 0; + } wakeup = 0; } else { __synchronize_entity_decay(se); } - if (wakeup) + /* migrated tasks did not contribute to our blocked load */ + if (wakeup) { subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); + update_entity_load_avg(se, 0); + } - update_entity_load_avg(se, 0); cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; - update_cfs_rq_blocked_load(cfs_rq); + /* we force update consideration on load-balancer moves */ + update_cfs_rq_blocked_load(cfs_rq, !wakeup); } /* @@ -1201,6 +1232,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, int sleep) { update_entity_load_avg(se, 1); + /* we force update consideration on load-balancer moves */ + update_cfs_rq_blocked_load(cfs_rq, !sleep); cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; if (sleep) { @@ -1218,7 +1251,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) {} -static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {} +static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, + int force_update) {} #endif static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -1610,7 +1644,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * Ensure that runnable average is periodically updated. */ update_entity_load_avg(curr, 1); - update_cfs_rq_blocked_load(cfs_rq); + update_cfs_rq_blocked_load(cfs_rq, 1); /* * Update share accounting for long-running entities. @@ -3057,6 +3091,19 @@ unlock: static void migrate_task_rq_fair(struct task_struct *p, int next_cpu) { + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + /* + * Load tracking: accumulate removed load so that it can be processed + * when we next update owning cfs_rq under rq->lock. Tasks contribute + * to blocked load iff they have a positive decay-count. It can never + * be negative here since on-rq tasks have decay-count == 0. + */ + if (se->avg.decay_count) { + se->avg.decay_count = -__synchronize_entity_decay(se); + atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load); + } } #endif /* CONFIG_SMP */ @@ -3593,7 +3640,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) update_rq_clock(rq); update_cfs_load(cfs_rq, 1); - update_cfs_rq_blocked_load(cfs_rq); + update_cfs_rq_blocked_load(cfs_rq, 1); /* * We need to update shares after updating tg->load_weight in @@ -5390,12 +5437,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) #endif #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) atomic64_set(&cfs_rq->decay_counter, 1); + atomic64_set(&cfs_rq->removed_load, 0); #endif } #ifdef CONFIG_FAIR_GROUP_SCHED static void task_move_group_fair(struct task_struct *p, int on_rq) { + struct cfs_rq *cfs_rq; /* * If the task was not on the rq at the time of this cgroup movement * it must have been asleep, sleeping tasks keep their ->vruntime @@ -5427,8 +5476,19 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) if (!on_rq) p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; set_task_rq(p, task_cpu(p)); - if (!on_rq) - p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; + if (!on_rq) { + cfs_rq = cfs_rq_of(&p->se); + p->se.vruntime += cfs_rq->min_vruntime; +#ifdef CONFIG_SMP + /* + * migrate_task_rq_fair() will have removed our previous + * contribution, but we must synchronize for ongoing future + * decay. + */ + p->se.avg.decay_count = atomic64_read(&cfs_rq->decay_counter); + cfs_rq->blocked_load_avg += p->se.avg.load_avg_contrib; +#endif + } } void free_fair_sched_group(struct task_group *tg) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 664ff39195f7..30236ab4edc0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -230,7 +230,7 @@ struct cfs_rq { * the FAIR_GROUP_SCHED case). */ u64 runnable_load_avg, blocked_load_avg; - atomic64_t decay_counter; + atomic64_t decay_counter, removed_load; u64 last_decay; #endif #ifdef CONFIG_FAIR_GROUP_SCHED |