diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 16:39:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 16:39:53 -0700 |
commit | dad1c12ed831a7a89cc01e5582cd0b81a4be7f19 (patch) | |
tree | 7a84799d3108bd9d3f1d4b530afd3ff9300db982 /include | |
parent | 090bc5a2a91499c1fd64b78d125daa6ca5531d38 (diff) | |
parent | af24bde8df2029f067dc46aff0393c8f18ff6e2f (diff) | |
download | linux-dad1c12ed831a7a89cc01e5582cd0b81a4be7f19.tar.bz2 |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Remove the unused per rq load array and all its infrastructure, by
Dietmar Eggemann.
- Add utilization clamping support by Patrick Bellasi. This is a
refinement of the energy aware scheduling framework with support for
boosting of interactive and capping of background workloads: to make
sure critical GUI threads get maximum frequency ASAP, and to make
sure background processing doesn't unnecessarily move to cpufreq
governor to higher frequencies and less energy efficient CPU modes.
- Add the bare minimum of tracepoints required for LISA EAS regression
testing, by Qais Yousef - which allows automated testing of various
power management features, including energy aware scheduling.
- Restructure the former tsk_nr_cpus_allowed() facility that the -rt
kernel used to modify the scheduler's CPU affinity logic such as
migrate_disable() - introduce the task->cpus_ptr value instead of
taking the address of &task->cpus_allowed directly - by Sebastian
Andrzej Siewior.
- Misc optimizations, fixes, cleanups and small enhancements - see the
Git log for details.
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
sched/uclamp: Add uclamp support to energy_compute()
sched/uclamp: Add uclamp_util_with()
sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks
sched/uclamp: Set default clamps for RT tasks
sched/uclamp: Reset uclamp values on RESET_ON_FORK
sched/uclamp: Extend sched_setattr() to support utilization clamping
sched/core: Allow sched_setattr() to use the current policy
sched/uclamp: Add system default clamps
sched/uclamp: Enforce last task's UCLAMP_MAX
sched/uclamp: Add bucket local max tracking
sched/uclamp: Add CPU's clamp buckets refcounting
sched/fair: Rename weighted_cpuload() to cpu_runnable_load()
sched/debug: Export the newly added tracepoints
sched/debug: Add sched_overutilized tracepoint
sched/debug: Add new tracepoint to track PELT at se level
sched/debug: Add new tracepoints to track PELT at rq level
sched/debug: Add a new sched_trace_*() helper functions
sched/autogroup: Make autogroup_path() always available
sched/wait: Deduplicate code with do-while
sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/arch_topology.h | 2 | ||||
-rw-r--r-- | include/linux/energy_model.h | 2 | ||||
-rw-r--r-- | include/linux/log2.h | 34 | ||||
-rw-r--r-- | include/linux/sched.h | 79 | ||||
-rw-r--r-- | include/linux/sched/nohz.h | 8 | ||||
-rw-r--r-- | include/linux/sched/sysctl.h | 11 | ||||
-rw-r--r-- | include/linux/sched/topology.h | 25 | ||||
-rw-r--r-- | include/trace/events/sched.h | 31 | ||||
-rw-r--r-- | include/uapi/linux/sched.h | 14 | ||||
-rw-r--r-- | include/uapi/linux/sched/types.h | 66 |
10 files changed, 227 insertions, 45 deletions
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index d9bdc1a7f4e7..1cfe05ea1d89 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_scale); struct sched_domain; static inline -unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) +unsigned long topology_get_cpu_scale(int cpu) { return per_cpu(cpu_scale, cpu); } diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index aa027f7bcb3e..73f8c3cb9588 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -89,7 +89,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, * like schedutil. */ cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + scale_cpu = arch_scale_cpu_capacity(cpu); cs = &pd->table[pd->nr_cap_states - 1]; freq = map_util_freq(max_util, cs->frequency, scale_cpu); diff --git a/include/linux/log2.h b/include/linux/log2.h index 1aec01365ed4..83a4a3ca3e8a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -220,4 +220,38 @@ int __order_base_2(unsigned long n) ilog2((n) - 1) + 1) : \ __order_base_2(n) \ ) + +static inline __attribute__((const)) +int __bits_per(unsigned long n) +{ + if (n < 2) + return 1; + if (is_power_of_2(n)) + return order_base_2(n) + 1; + return order_base_2(n); +} + +/** + * bits_per - calculate the number of bits required for the argument + * @n: parameter + * + * This is constant-capable and can be used for compile time + * initializations, e.g bitfields. + * + * The first few values calculated by this routine: + * bf(0) = 1 + * bf(1) = 1 + * bf(2) = 2 + * bf(3) = 2 + * bf(4) = 3 + * ... and so on. + */ +#define bits_per(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) \ + ? 1 : ilog2(n) + 1 \ + ) : \ + __bits_per(n) \ +) #endif /* _LINUX_LOG2_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 76adce49b5ad..459d95e4a574 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ struct audit_context; struct backing_dev_info; struct bio_list; struct blk_plug; +struct capture_control; struct cfs_rq; struct fs_struct; struct futex_pi_state; @@ -47,8 +48,9 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; -struct capture_control; struct robust_list_head; +struct root_domain; +struct rq; struct sched_attr; struct sched_param; struct seq_file; @@ -281,6 +283,18 @@ struct vtime { u64 gtime; }; +/* + * Utilization clamp constraints. + * @UCLAMP_MIN: Minimum utilization + * @UCLAMP_MAX: Maximum utilization + * @UCLAMP_CNT: Utilization clamp constraints count + */ +enum uclamp_id { + UCLAMP_MIN = 0, + UCLAMP_MAX, + UCLAMP_CNT +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -312,6 +326,10 @@ struct sched_info { # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) +/* Increase resolution of cpu_capacity calculations */ +# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + struct load_weight { unsigned long weight; u32 inv_weight; @@ -560,6 +578,41 @@ struct sched_dl_entity { struct hrtimer inactive_timer; }; +#ifdef CONFIG_UCLAMP_TASK +/* Number of utilization clamp buckets (shorter alias) */ +#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT + +/* + * Utilization clamp for a scheduling entity + * @value: clamp value "assigned" to a se + * @bucket_id: bucket index corresponding to the "assigned" value + * @active: the se is currently refcounted in a rq's bucket + * @user_defined: the requested clamp value comes from user-space + * + * The bucket_id is the index of the clamp bucket matching the clamp value + * which is pre-computed and stored to avoid expensive integer divisions from + * the fast path. + * + * The active bit is set whenever a task has got an "effective" value assigned, + * which can be different from the clamp value "requested" from user-space. + * This allows to know a task is refcounted in the rq's bucket corresponding + * to the "effective" bucket_id. + * + * The user_defined bit is set whenever a task has got a task-specific clamp + * value requested from userspace, i.e. the system defaults apply to this task + * just as a restriction. This allows to relax default clamps when a less + * restrictive task-specific value has been requested, thus allowing to + * implement a "nice" semantic. For example, a task running with a 20% + * default boost can still drop its own boosting to 0%. + */ +struct uclamp_se { + unsigned int value : bits_per(SCHED_CAPACITY_SCALE); + unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); + unsigned int active : 1; + unsigned int user_defined : 1; +}; +#endif /* CONFIG_UCLAMP_TASK */ + union rcu_special { struct { u8 blocked; @@ -640,6 +693,13 @@ struct task_struct { #endif struct sched_dl_entity dl; +#ifdef CONFIG_UCLAMP_TASK + /* Clamp values requested for a scheduling entity */ + struct uclamp_se uclamp_req[UCLAMP_CNT]; + /* Effective clamp values used for a scheduling entity */ + struct uclamp_se uclamp[UCLAMP_CNT]; +#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; @@ -651,7 +711,8 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; - cpumask_t cpus_allowed; + const cpumask_t *cpus_ptr; + cpumask_t cpus_mask; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1399,7 +1460,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ @@ -1915,4 +1976,16 @@ static inline void rseq_syscall(struct pt_regs *regs) #endif +const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); +char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); +int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); + +const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq); +const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); +const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); + +int sched_trace_rq_cpu(struct rq *rq); + +const struct cpumask *sched_trace_rd_span(struct root_domain *rd); + #endif diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index b36f4cf38111..1abe91ff6e4a 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -7,14 +7,6 @@ */ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern int get_nohz_timer_target(void); #else diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 99ce6d728df7..d4f6215ee03f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +#ifdef CONFIG_UCLAMP_TASK +extern unsigned int sysctl_sched_uclamp_util_min; +extern unsigned int sysctl_sched_uclamp_util_max; +#endif + #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif @@ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_UCLAMP_TASK +extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + extern int sysctl_numa_balancing(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cfc0a89a7159..7863bb62d2ab 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -7,12 +7,6 @@ #include <linux/sched/idle.h> /* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - -/* * sched-domains (multiprocessor balancing) declarations: */ #ifdef CONFIG_SMP @@ -84,11 +78,6 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ @@ -201,14 +190,6 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl); # define SD_INIT_NAME(type) #endif -#ifndef arch_scale_cpu_capacity -static __always_inline -unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) -{ - return SCHED_CAPACITY_SCALE; -} -#endif - #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -224,16 +205,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) return true; } +#endif /* !CONFIG_SMP */ + #ifndef arch_scale_cpu_capacity static __always_inline -unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) +unsigned long arch_scale_cpu_capacity(int cpu) { return SCHED_CAPACITY_SCALE; } #endif -#endif /* !CONFIG_SMP */ - static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index c8c7c7efb487..420e80e56e55 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -594,6 +594,37 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); + +/* + * Following tracepoints are not exported in tracefs and provide hooking + * mechanisms only for testing and debugging purposes. + * + * Postfixed with _tp to make them easily identifiable in the code. + */ +DECLARE_TRACE(pelt_cfs_tp, + TP_PROTO(struct cfs_rq *cfs_rq), + TP_ARGS(cfs_rq)); + +DECLARE_TRACE(pelt_rt_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); + +DECLARE_TRACE(pelt_dl_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); + +DECLARE_TRACE(pelt_irq_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); + +DECLARE_TRACE(pelt_se_tp, + TP_PROTO(struct sched_entity *se), + TP_ARGS(se)); + +DECLARE_TRACE(sched_overutilized_tp, + TP_PROTO(struct root_domain *rd, bool overutilized), + TP_ARGS(rd, overutilized)); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index ed4ee170bee2..617bb59aa8ba 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -51,9 +51,21 @@ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02 #define SCHED_FLAG_DL_OVERRUN 0x04 +#define SCHED_FLAG_KEEP_POLICY 0x08 +#define SCHED_FLAG_KEEP_PARAMS 0x10 +#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20 +#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 + +#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \ + SCHED_FLAG_KEEP_PARAMS) + +#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \ + SCHED_FLAG_UTIL_CLAMP_MAX) #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ SCHED_FLAG_RECLAIM | \ - SCHED_FLAG_DL_OVERRUN) + SCHED_FLAG_DL_OVERRUN | \ + SCHED_FLAG_KEEP_ALL | \ + SCHED_FLAG_UTIL_CLAMP) #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h index 10fbb8031930..c852153ddb0d 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -9,6 +9,7 @@ struct sched_param { }; #define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ +#define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */ /* * Extended scheduling parameters data structure. @@ -21,8 +22,33 @@ struct sched_param { * the tasks may be useful for a wide variety of application fields, e.g., * multimedia, streaming, automation and control, and many others. * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: + * This variant (sched_attr) allows to define additional attributes to + * improve the scheduler knowledge about task requirements. + * + * Scheduling Class Attributes + * =========================== + * + * A subset of sched_attr attributes specifies the + * scheduling policy and relative POSIX attributes: + * + * @size size of the structure, for fwd/bwd compat. + * + * @sched_policy task's scheduling policy + * @sched_nice task's nice value (SCHED_NORMAL/BATCH) + * @sched_priority task's static priority (SCHED_FIFO/RR) + * + * Certain more advanced scheduling features can be controlled by a + * predefined set of flags via the attribute: + * + * @sched_flags for customizing the scheduler behaviour + * + * Sporadic Time-Constrained Task Attributes + * ========================================= + * + * A subset of sched_attr attributes allows to describe a so-called + * sporadic time-constrained task. + * + * In such a model a task is specified by: * - the activation period or minimum instance inter-arrival time; * - the maximum (or average, depending on the actual scheduling * discipline) computation time of all instances, a.k.a. runtime; @@ -34,14 +60,8 @@ struct sched_param { * than the runtime and must be completed by time instant t equal to * the instance activation time + the deadline. * - * This is reflected by the actual fields of the sched_attr structure: + * This is reflected by the following fields of the sched_attr structure: * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) * @sched_deadline representative of the task's deadline * @sched_runtime representative of the task's runtime * @sched_period representative of the task's period @@ -53,6 +73,29 @@ struct sched_param { * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the * only user of this new interface. More information about the algorithm * available in the scheduling class file or in Documentation/. + * + * Task Utilization Attributes + * =========================== + * + * A subset of sched_attr attributes allows to specify the utilization + * expected for a task. These attributes allow to inform the scheduler about + * the utilization boundaries within which it should schedule the task. These + * boundaries are valuable hints to support scheduler decisions on both task + * placement and frequency selection. + * + * @sched_util_min represents the minimum utilization + * @sched_util_max represents the maximum utilization + * + * Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It + * represents the percentage of CPU time used by a task when running at the + * maximum frequency on the highest capacity CPU of the system. For example, a + * 20% utilization task is a task running for 2ms every 10ms at maximum + * frequency. + * + * A task with a min utilization value bigger than 0 is more likely scheduled + * on a CPU with a capacity big enough to fit the specified value. + * A task with a max utilization value smaller than 1024 is more likely + * scheduled on a CPU with no more capacity than the specified value. */ struct sched_attr { __u32 size; @@ -70,6 +113,11 @@ struct sched_attr { __u64 sched_runtime; __u64 sched_deadline; __u64 sched_period; + + /* Utilization hints */ + __u32 sched_util_min; + __u32 sched_util_max; + }; #endif /* _UAPI_LINUX_SCHED_TYPES_H */ |