diff options
-rw-r--r-- | include/linux/sched.h | 24 | ||||
-rw-r--r-- | include/linux/topology.h | 1 | ||||
-rw-r--r-- | kernel/sched.c | 193 | ||||
-rw-r--r-- | kernel/sched_debug.c | 22 | ||||
-rw-r--r-- | kernel/sched_fair.c | 21 | ||||
-rw-r--r-- | kernel/sched_rt.c | 14 | ||||
-rw-r--r-- | kernel/sched_stats.h | 2 |
7 files changed, 134 insertions, 143 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2e490271acf6..17249fae5014 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,7 +734,6 @@ struct sched_domain { unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ unsigned int busy_idx; unsigned int idle_idx; @@ -875,7 +874,7 @@ struct sched_class { void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); - void (*task_new) (struct rq *rq, struct task_struct *p); + void (*task_new) (struct rq *rq, struct task_struct *p, u64 now); }; struct load_weight { @@ -905,23 +904,28 @@ struct sched_entity { struct rb_node run_node; unsigned int on_rq; + u64 exec_start; + u64 sum_exec_runtime; u64 wait_start_fair; + u64 sleep_start_fair; + +#ifdef CONFIG_SCHEDSTATS u64 wait_start; - u64 exec_start; + u64 wait_max; + s64 sum_wait_runtime; + u64 sleep_start; - u64 sleep_start_fair; - u64 block_start; u64 sleep_max; + s64 sum_sleep_runtime; + + u64 block_start; u64 block_max; u64 exec_max; - u64 wait_max; - u64 last_ran; - u64 sum_exec_runtime; - s64 sum_wait_runtime; - s64 sum_sleep_runtime; unsigned long wait_runtime_overruns; unsigned long wait_runtime_underruns; +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ diff --git a/include/linux/topology.h b/include/linux/topology.h index d0890a7e5bab..525d437b1253 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -185,7 +185,6 @@ .max_interval = 64*num_online_cpus(), \ .busy_factor = 128, \ .imbalance_pct = 133, \ - .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ .busy_idx = 3, \ .idle_idx = 3, \ diff --git a/kernel/sched.c b/kernel/sched.c index 238a76957e86..72bb9483d949 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -637,7 +637,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 -static inline unsigned long +static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) { @@ -657,7 +657,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; } - return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit); + return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } static inline unsigned long @@ -678,46 +678,6 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) lw->inv_weight = 0; } -static void __update_curr_load(struct rq *rq, struct load_stat *ls) -{ - if (rq->curr != rq->idle && ls->load.weight) { - ls->delta_exec += ls->delta_stat; - ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load); - ls->delta_stat = 0; - } -} - -/* - * Update delta_exec, delta_fair fields for rq. - * - * delta_fair clock advances at a rate inversely proportional to - * total load (rq->ls.load.weight) on the runqueue, while - * delta_exec advances at the same rate as wall-clock (provided - * cpu is not idle). - * - * delta_exec / delta_fair is a measure of the (smoothened) load on this - * runqueue over any given interval. This (smoothened) load is used - * during load balance. - * - * This function is called /before/ updating rq->ls.load - * and when switching tasks. - */ -static void update_curr_load(struct rq *rq, u64 now) -{ - struct load_stat *ls = &rq->ls; - u64 start; - - start = ls->load_update_start; - ls->load_update_start = now; - ls->delta_stat += now - start; - /* - * Stagger updates to ls->delta_fair. Very frequent updates - * can be expensive. - */ - if (ls->delta_stat >= sysctl_sched_stat_granularity) - __update_curr_load(rq, ls); -} - /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that @@ -727,19 +687,6 @@ static void update_curr_load(struct rq *rq, u64 now) * slice expiry etc. */ -/* - * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE - * If static_prio_timeslice() is ever changed to break this assumption then - * this code will need modification - */ -#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE -#define load_weight(lp) \ - (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO) -#define PRIO_TO_LOAD_WEIGHT(prio) \ - load_weight(static_prio_timeslice(prio)) -#define RTPRIO_TO_LOAD_WEIGHT(rp) \ - (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp)) - #define WEIGHT_IDLEPRIO 2 #define WMULT_IDLEPRIO (1 << 31) @@ -781,32 +728,6 @@ static const u32 prio_to_wmult[40] = { /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; -static inline void -inc_load(struct rq *rq, const struct task_struct *p, u64 now) -{ - update_curr_load(rq, now); - update_load_add(&rq->ls.load, p->se.load.weight); -} - -static inline void -dec_load(struct rq *rq, const struct task_struct *p, u64 now) -{ - update_curr_load(rq, now); - update_load_sub(&rq->ls.load, p->se.load.weight); -} - -static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) -{ - rq->nr_running++; - inc_load(rq, p, now); -} - -static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) -{ - rq->nr_running--; - dec_load(rq, p, now); -} - static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); /* @@ -837,6 +758,72 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, #define sched_class_highest (&rt_sched_class) +static void __update_curr_load(struct rq *rq, struct load_stat *ls) +{ + if (rq->curr != rq->idle && ls->load.weight) { + ls->delta_exec += ls->delta_stat; + ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load); + ls->delta_stat = 0; + } +} + +/* + * Update delta_exec, delta_fair fields for rq. + * + * delta_fair clock advances at a rate inversely proportional to + * total load (rq->ls.load.weight) on the runqueue, while + * delta_exec advances at the same rate as wall-clock (provided + * cpu is not idle). + * + * delta_exec / delta_fair is a measure of the (smoothened) load on this + * runqueue over any given interval. This (smoothened) load is used + * during load balance. + * + * This function is called /before/ updating rq->ls.load + * and when switching tasks. + */ +static void update_curr_load(struct rq *rq, u64 now) +{ + struct load_stat *ls = &rq->ls; + u64 start; + + start = ls->load_update_start; + ls->load_update_start = now; + ls->delta_stat += now - start; + /* + * Stagger updates to ls->delta_fair. Very frequent updates + * can be expensive. + */ + if (ls->delta_stat >= sysctl_sched_stat_granularity) + __update_curr_load(rq, ls); +} + +static inline void +inc_load(struct rq *rq, const struct task_struct *p, u64 now) +{ + update_curr_load(rq, now); + update_load_add(&rq->ls.load, p->se.load.weight); +} + +static inline void +dec_load(struct rq *rq, const struct task_struct *p, u64 now) +{ + update_curr_load(rq, now); + update_load_sub(&rq->ls.load, p->se.load.weight); +} + +static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) +{ + rq->nr_running++; + inc_load(rq, p, now); +} + +static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) +{ + rq->nr_running--; + dec_load(rq, p, now); +} + static void set_load_weight(struct task_struct *p) { task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; @@ -996,18 +983,21 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) u64 clock_offset, fair_clock_offset; clock_offset = old_rq->clock - new_rq->clock; - fair_clock_offset = old_rq->cfs.fair_clock - - new_rq->cfs.fair_clock; - if (p->se.wait_start) - p->se.wait_start -= clock_offset; + fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock; + if (p->se.wait_start_fair) p->se.wait_start_fair -= fair_clock_offset; + if (p->se.sleep_start_fair) + p->se.sleep_start_fair -= fair_clock_offset; + +#ifdef CONFIG_SCHEDSTATS + if (p->se.wait_start) + p->se.wait_start -= clock_offset; if (p->se.sleep_start) p->se.sleep_start -= clock_offset; if (p->se.block_start) p->se.block_start -= clock_offset; - if (p->se.sleep_start_fair) - p->se.sleep_start_fair -= fair_clock_offset; +#endif __set_task_cpu(p, new_cpu); } @@ -1568,17 +1558,19 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state) static void __sched_fork(struct task_struct *p) { p->se.wait_start_fair = 0; - p->se.wait_start = 0; p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.delta_exec = 0; p->se.delta_fair_run = 0; p->se.delta_fair_sleep = 0; p->se.wait_runtime = 0; + p->se.sleep_start_fair = 0; + +#ifdef CONFIG_SCHEDSTATS + p->se.wait_start = 0; p->se.sum_wait_runtime = 0; p->se.sum_sleep_runtime = 0; p->se.sleep_start = 0; - p->se.sleep_start_fair = 0; p->se.block_start = 0; p->se.sleep_max = 0; p->se.block_max = 0; @@ -1586,6 +1578,7 @@ static void __sched_fork(struct task_struct *p) p->se.wait_max = 0; p->se.wait_runtime_overruns = 0; p->se.wait_runtime_underruns = 0; +#endif INIT_LIST_HEAD(&p->run_list); p->se.on_rq = 0; @@ -1654,22 +1647,27 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; + u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ + now = rq_clock(rq); p->prio = effective_prio(p); - if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || - task_cpu(p) != this_cpu || !current->se.on_rq) { + if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || + (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || + !current->se.on_rq) { + activate_task(rq, p, 0); } else { /* * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p); + p->sched_class->task_new(rq, p, now); + inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); @@ -2908,8 +2906,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) schedstat_inc(sd, alb_cnt); if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE, - NULL)) + ULONG_MAX, sd, CPU_IDLE, NULL)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -5269,8 +5266,6 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) sizeof(int), 0644, proc_dointvec_minmax); set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time, - sizeof(long long), 0644, proc_doulongvec_minmax); set_table_entry(&table[10], 11, "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); @@ -6590,12 +6585,14 @@ void normalize_rt_tasks(void) do_each_thread(g, p) { p->se.fair_key = 0; p->se.wait_runtime = 0; + p->se.exec_start = 0; p->se.wait_start_fair = 0; + p->se.sleep_start_fair = 0; +#ifdef CONFIG_SCHEDSTATS p->se.wait_start = 0; - p->se.exec_start = 0; p->se.sleep_start = 0; - p->se.sleep_start_fair = 0; p->se.block_start = 0; +#endif task_rq(p)->cfs.fair_clock = 0; task_rq(p)->clock = 0; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 0eca442b7792..1c61e5315ad2 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -44,11 +44,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) (long long)p->se.wait_runtime, (long long)(p->nvcsw + p->nivcsw), p->prio, +#ifdef CONFIG_SCHEDSTATS (long long)p->se.sum_exec_runtime, (long long)p->se.sum_wait_runtime, (long long)p->se.sum_sleep_runtime, (long long)p->se.wait_runtime_overruns, - (long long)p->se.wait_runtime_underruns); + (long long)p->se.wait_runtime_underruns +#else + 0LL, 0LL, 0LL, 0LL, 0LL +#endif + ); } static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) @@ -171,7 +176,7 @@ static int sched_debug_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Sched Debug Version: v0.05, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.05-v20, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -235,21 +240,24 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) #define P(F) \ SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) - P(se.wait_start); + P(se.wait_runtime); P(se.wait_start_fair); P(se.exec_start); - P(se.sleep_start); P(se.sleep_start_fair); + P(se.sum_exec_runtime); + +#ifdef CONFIG_SCHEDSTATS + P(se.wait_start); + P(se.sleep_start); P(se.block_start); P(se.sleep_max); P(se.block_max); P(se.exec_max); P(se.wait_max); - P(se.wait_runtime); P(se.wait_runtime_overruns); P(se.wait_runtime_underruns); P(se.sum_wait_runtime); - P(se.sum_exec_runtime); +#endif SEQ_printf(m, "%-25s:%20Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); P(se.load.weight); @@ -269,7 +277,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) void proc_sched_set_task(struct task_struct *p) { +#ifdef CONFIG_SCHEDSTATS p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0; p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; +#endif p->se.sum_exec_runtime = 0; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6971db0a7160..6f579ff5a9bc 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -292,10 +292,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) return; delta_exec = curr->delta_exec; -#ifdef CONFIG_SCHEDSTATS - if (unlikely(delta_exec > curr->exec_max)) - curr->exec_max = delta_exec; -#endif + schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); curr->sum_exec_runtime += delta_exec; cfs_rq->exec_clock += delta_exec; @@ -352,7 +349,7 @@ static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { se->wait_start_fair = cfs_rq->fair_clock; - se->wait_start = now; + schedstat_set(se->wait_start, now); } /* @@ -425,13 +422,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { unsigned long delta_fair = se->delta_fair_run; -#ifdef CONFIG_SCHEDSTATS - { - s64 delta_wait = now - se->wait_start; - if (unlikely(delta_wait > se->wait_max)) - se->wait_max = delta_wait; - } -#endif + schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start)); if (unlikely(se->load.weight != NICE_0_LOAD)) delta_fair = calc_weighted(delta_fair, se->load.weight, @@ -456,7 +447,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) } se->wait_start_fair = 0; - se->wait_start = 0; + schedstat_set(se->wait_start, 0); } static inline void @@ -1041,11 +1032,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) * monopolize the CPU. Note: the parent runqueue is locked, * the child is not running yet. */ -static void task_new_fair(struct rq *rq, struct task_struct *p) +static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) { struct cfs_rq *cfs_rq = task_cfs_rq(p); struct sched_entity *se = &p->se; - u64 now = rq_clock(rq); sched_info_queued(p); @@ -1072,7 +1062,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) p->se.wait_runtime = -(sysctl_sched_granularity / 2); __enqueue_entity(cfs_rq, se); - inc_nr_running(p, rq, now); } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1192a2741b99..002fcf8d3f64 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -18,8 +18,8 @@ static inline void update_curr_rt(struct rq *rq, u64 now) delta_exec = now - curr->se.exec_start; if (unlikely((s64)delta_exec < 0)) delta_exec = 0; - if (unlikely(delta_exec > curr->se.exec_max)) - curr->se.exec_max = delta_exec; + + schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; curr->se.exec_start = now; @@ -229,15 +229,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) requeue_task_rt(rq, p); } -/* - * No parent/child timeslice management necessary for RT tasks, - * just activate them: - */ -static void task_new_rt(struct rq *rq, struct task_struct *p) -{ - activate_task(rq, p, 1); -} - static struct sched_class rt_sched_class __read_mostly = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, @@ -251,5 +242,4 @@ static struct sched_class rt_sched_class __read_mostly = { .load_balance = load_balance_rt, .task_tick = task_tick_rt, - .task_new = task_new_rt, }; diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index c63c38f6fa6e..c20a94dda61e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -116,6 +116,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) } # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) +# define schedstat_set(var, val) do { var = (val); } while (0) #else /* !CONFIG_SCHEDSTATS */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) @@ -125,6 +126,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) {} # define schedstat_inc(rq, field) do { } while (0) # define schedstat_add(rq, field, amt) do { } while (0) +# define schedstat_set(var, val) do { } while (0) #endif #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |