Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The main changes in this cycle were: - tickless load average calculation enhancements (Byungchul Park) - vtime handling enhancements (Frederic Weisbecker) - scalability improvement via properly aligning a key structure field (Jiri Olsa) - various stop_machine() fixes (Oleg Nesterov) - sched/numa enhancement (Rik van Riel) - various fixes and improvements (Andi Kleen, Dietmar Eggemann, Geliang Tang, Hiroshi Shimamoto, Joonwoo Park, Peter Zijlstra, Waiman Long, Wanpeng Li, Yuyang Du)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits) sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task() sched/core: Move sched_entity::avg into separate cache line x86/fpu: Properly align size in CHECK_MEMBER_AT_END_OF() macro sched/deadline: Fix the earliest_dl.next logic sched/fair: Disable the task group load_avg update for the root_task_group sched/fair: Move the cache-hot 'load_avg' variable into its own cacheline sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats() sched/core: Move the sched_to_prio[] arrays out of line sched/cputime: Convert vtime_seqlock to seqcount sched/cputime: Introduce vtime accounting check for readers sched/cputime: Rename vtime_accounting_enabled() to vtime_accounting_cpu_enabled() sched/cputime: Correctly handle task guest time on housekeepers sched/cputime: Clarify vtime symbols and document them sched/cputime: Remove extra cost in task_cputime() sched/fair: Make it possible to account fair load avg consistently sched/fair: Modify the comment about lock assumptions in migrate_task_rq_fair() stop_machine: Clean up the usage of the preemption counter in cpu_stopper_thread() stop_machine: Shift the 'done != NULL' check from cpu_stop_signal_done() to callers stop_machine: Kill cpu_stop_done->executed stop_machine: Change __stop_cpus() to rely on cpu_stop_queue_work() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-01-11 15:13:38 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-01-11 15:13:38 -0800
commit: af345201ea948d0976d775958d8aa22fe5e5ba58 (patch)
tree: 2badae3f02ff9415c86a2188b0b5d565dc257a6c /include
parent: 4bd20db2c027eab7490e3c0466734738bef2dd24 (diff)
parent: 0905f04eb21fc1c2e690bed5d0418a061d56c225 (diff)
download: linux-af345201ea948d0976d775958d8aa22fe5e5ba58.tar.bz2
6 files changed, 70 insertions, 18 deletions
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 68b575afe5f5..d259274238db 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { }
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static inline void guest_enter(void)
 {
-	if (vtime_accounting_enabled())
+	if (vtime_accounting_cpu_enabled())
 		vtime_guest_enter(current);
 	else
 		current->flags |= PF_VCPU;
@@ -100,7 +100,7 @@ static inline void guest_exit(void)
 	if (context_tracking_is_enabled())
 		__context_tracking_exit(CONTEXT_GUEST);
 
-	if (vtime_accounting_enabled())
+	if (vtime_accounting_cpu_enabled())
 		vtime_guest_exit(current);
 	else
 		current->flags &= ~PF_VCPU;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1c1ff7e4faa4..f2cb8d45513d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -150,7 +150,7 @@ extern struct task_group root_task_group;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 # define INIT_VTIME(tsk)						\
-	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
+	.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),	\
 	.vtime_snap = 0,				\
 	.vtime_snap_whence = VTIME_SYS,
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fa39434e3fdd..0c0e78102850 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void update_cpu_load_nohz(void);
+extern void update_cpu_load_nohz(int active);
 #else
-static inline void update_cpu_load_nohz(void) { }
+static inline void update_cpu_load_nohz(int active) { }
 #endif
 
 extern unsigned long get_parent_ip(unsigned long addr);
@@ -1268,8 +1268,13 @@ struct sched_entity {
 #endif
 
 #ifdef CONFIG_SMP
-	/* Per entity load average tracking */
-	struct sched_avg	avg;
+	/*
+	 * Per entity load average tracking.
+	 *
+	 * Put into separate cache line so it does not
+	 * collide with read-mostly values above.
+	 */
+	struct sched_avg	avg ____cacheline_aligned_in_smp;
 #endif
 };
 
@@ -1520,11 +1525,14 @@ struct task_struct {
 	cputime_t gtime;
 	struct prev_cputime prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqlock_t vtime_seqlock;
+	seqcount_t vtime_seqcount;
 	unsigned long long vtime_snap;
 	enum {
-		VTIME_SLEEPING = 0,
+		/* Task is sleeping or running in a CPU with VTIME inactive */
+		VTIME_INACTIVE = 0,
+		/* Task runs in userspace in a CPU with VTIME active */
 		VTIME_USER,
+		/* Task runs in kernelspace in a CPU with VTIME active */
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0e1b1540597a..3cc9632dcc2a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -29,7 +29,7 @@ struct cpu_stop_work {
 
 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			 struct cpu_stop_work *work_buf);
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work)
 	preempt_enable();
 }
 
-static inline void stop_one_cpu_nowait(unsigned int cpu,
+static inline bool stop_one_cpu_nowait(unsigned int cpu,
 				       cpu_stop_fn_t fn, void *arg,
 				       struct cpu_stop_work *work_buf)
 {
@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu,
 		work_buf->fn = fn;
 		work_buf->arg = arg;
 		schedule_work(&work_buf->work);
+		return true;
 	}
+
+	return false;
 }
 
 static inline int stop_cpus(const struct cpumask *cpumask,
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index c5165fd256f9..fa2196990f84 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,16 +10,27 @@
 struct task_struct;
 
 /*
- * vtime_accounting_enabled() definitions/declarations
+ * vtime_accounting_cpu_enabled() definitions/declarations
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline bool vtime_accounting_enabled(void) { return true; }
+static inline bool vtime_accounting_cpu_enabled(void) { return true; }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+/*
+ * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
+ * in that case and compute the tickless cputime.
+ * For now vtime state is tied to context tracking. We might want to decouple
+ * those later if necessary.
+ */
 static inline bool vtime_accounting_enabled(void)
 {
-	if (context_tracking_is_enabled()) {
+	return context_tracking_is_enabled();
+}
+
+static inline bool vtime_accounting_cpu_enabled(void)
+{
+	if (vtime_accounting_enabled()) {
 		if (context_tracking_cpu_is_enabled())
 			return true;
 	}
@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void)
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-static inline bool vtime_accounting_enabled(void) { return false; }
+static inline bool vtime_accounting_cpu_enabled(void) { return false; }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 
@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_common_task_switch(struct task_struct *prev);
 static inline void vtime_task_switch(struct task_struct *prev)
 {
-	if (vtime_accounting_enabled())
+	if (vtime_accounting_cpu_enabled())
 		vtime_common_task_switch(prev);
 }
 #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
 extern void vtime_common_account_irq_enter(struct task_struct *tsk);
 static inline void vtime_account_irq_enter(struct task_struct *tsk)
 {
-	if (vtime_accounting_enabled())
+	if (vtime_accounting_cpu_enabled())
 		vtime_common_account_irq_enter(tsk);
 }
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
 
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
-	if (vtime_accounting_enabled())
+	if (vtime_accounting_cpu_enabled())
 		vtime_gen_account_irq_exit(tsk);
 }
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 513b36f04dfd..d2f4ec7dba7c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
 	q->func		= func;
 }
 
+/**
+ * waitqueue_active -- locklessly test for waiters on the queue
+ * @q: the waitqueue to test for waiters
+ *
+ * returns true if the wait list is not empty
+ *
+ * NOTE: this function is lockless and requires care, incorrect usage _will_
+ * lead to sporadic and non-obvious failure.
+ *
+ * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * with an extra smp_mb() like:
+ *
+ *      CPU0 - waker                    CPU1 - waiter
+ *
+ *                                      for (;;) {
+ *      @cond = true;                     prepare_to_wait(&wq, &wait, state);
+ *      smp_mb();                         // smp_mb() from set_current_state()
+ *      if (waitqueue_active(wq))         if (@cond)
+ *        wake_up(wq);                      break;
+ *                                        schedule();
+ *                                      }
+ *                                      finish_wait(&wq, &wait);
+ *
+ * Because without the explicit smp_mb() it's possible for the
+ * waitqueue_active() load to get hoisted over the @cond store such that we'll
+ * observe an empty wait list while the waiter might not observe @cond.
+ *
+ * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
+ * which (when the lock is uncontended) are of roughly equal cost.
+ */
 static inline int waitqueue_active(wait_queue_head_t *q)
 {
 	return !list_empty(&q->task_list);
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-01-11 15:13:38 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-01-11 15:13:38 -0800
commit	af345201ea948d0976d775958d8aa22fe5e5ba58 (patch)
tree	2badae3f02ff9415c86a2188b0b5d565dc257a6c /include
parent	4bd20db2c027eab7490e3c0466734738bef2dd24 (diff)
parent	0905f04eb21fc1c2e690bed5d0418a061d56c225 (diff)
download	linux-af345201ea948d0976d775958d8aa22fe5e5ba58.tar.bz2