summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sched_rt.c225
2 files changed, 231 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6185fa080ec8..97cab609fc31 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1952,6 +1952,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
prev_state = prev->state;
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
+ schedule_tail_balance_rt(rq);
+
fire_sched_in_preempt_notifiers(current);
if (mm)
mmdrop(mm);
@@ -2185,11 +2187,13 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
*/
-static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
__releases(this_rq->lock)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
+ int ret = 0;
+
if (unlikely(!irqs_disabled())) {
/* printk() doesn't work good under rq->lock */
spin_unlock(&this_rq->lock);
@@ -2200,9 +2204,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
+ ret = 1;
} else
spin_lock(&busiest->lock);
}
+ return ret;
}
/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 136c2857a049..7815e90b1147 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -137,6 +137,227 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
}
#ifdef CONFIG_SMP
+/* Only try algorithms three times */
+#define RT_MAX_TRIES 3
+
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
+static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
+
+/* Return the second highest RT task, NULL otherwise */
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
+{
+ struct rt_prio_array *array = &rq->rt.active;
+ struct task_struct *next;
+ struct list_head *queue;
+ int idx;
+
+ assert_spin_locked(&rq->lock);
+
+ if (likely(rq->rt.rt_nr_running < 2))
+ return NULL;
+
+ idx = sched_find_first_bit(array->bitmap);
+ if (unlikely(idx >= MAX_RT_PRIO)) {
+ WARN_ON(1); /* rt_nr_running is bad */
+ return NULL;
+ }
+
+ queue = array->queue + idx;
+ next = list_entry(queue->next, struct task_struct, run_list);
+ if (unlikely(next != rq->curr))
+ return next;
+
+ if (queue->next->next != queue) {
+ /* same prio task */
+ next = list_entry(queue->next->next, struct task_struct, run_list);
+ return next;
+ }
+
+ /* slower, but more flexible */
+ idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
+ if (unlikely(idx >= MAX_RT_PRIO)) {
+ WARN_ON(1); /* rt_nr_running was 2 and above! */
+ return NULL;
+ }
+
+ queue = array->queue + idx;
+ next = list_entry(queue->next, struct task_struct, run_list);
+
+ return next;
+}
+
+static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
+
+/* Will lock the rq it finds */
+static struct rq *find_lock_lowest_rq(struct task_struct *task,
+ struct rq *this_rq)
+{
+ struct rq *lowest_rq = NULL;
+ int cpu;
+ int tries;
+ cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);
+
+ cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);
+
+ for (tries = 0; tries < RT_MAX_TRIES; tries++) {
+ /*
+ * Scan each rq for the lowest prio.
+ */
+ for_each_cpu_mask(cpu, *cpu_mask) {
+ struct rq *rq = &per_cpu(runqueues, cpu);
+
+ if (cpu == this_rq->cpu)
+ continue;
+
+ /* We look for lowest RT prio or non-rt CPU */
+ if (rq->rt.highest_prio >= MAX_RT_PRIO) {
+ lowest_rq = rq;
+ break;
+ }
+
+ /* no locking for now */
+ if (rq->rt.highest_prio > task->prio &&
+ (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
+ lowest_rq = rq;
+ }
+ }
+
+ if (!lowest_rq)
+ break;
+
+ /* if the prio of this runqueue changed, try again */
+ if (double_lock_balance(this_rq, lowest_rq)) {
+ /*
+ * We had to unlock the run queue. In
+ * the mean time, task could have
+ * migrated already or had its affinity changed.
+ * Also make sure that it wasn't scheduled on its rq.
+ */
+ if (unlikely(task_rq(task) != this_rq ||
+ !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
+ task_running(this_rq, task) ||
+ !task->se.on_rq)) {
+ spin_unlock(&lowest_rq->lock);
+ lowest_rq = NULL;
+ break;
+ }
+ }
+
+ /* If this rq is still suitable use it. */
+ if (lowest_rq->rt.highest_prio > task->prio)
+ break;
+
+ /* try again */
+ spin_unlock(&lowest_rq->lock);
+ lowest_rq = NULL;
+ }
+
+ return lowest_rq;
+}
+
+/*
+ * If the current CPU has more than one RT task, see if the non
+ * running task can migrate over to a CPU that is running a task
+ * of lesser priority.
+ */
+static int push_rt_task(struct rq *this_rq)
+{
+ struct task_struct *next_task;
+ struct rq *lowest_rq;
+ int ret = 0;
+ int paranoid = RT_MAX_TRIES;
+
+ assert_spin_locked(&this_rq->lock);
+
+ next_task = pick_next_highest_task_rt(this_rq);
+ if (!next_task)
+ return 0;
+
+ retry:
+ if (unlikely(next_task == this_rq->curr))
+ return 0;
+
+ /*
+ * It's possible that the next_task slipped in of
+ * higher priority than current. If that's the case
+ * just reschedule current.
+ */
+ if (unlikely(next_task->prio < this_rq->curr->prio)) {
+ resched_task(this_rq->curr);
+ return 0;
+ }
+
+ /* We might release this_rq lock */
+ get_task_struct(next_task);
+
+ /* find_lock_lowest_rq locks the rq if found */
+ lowest_rq = find_lock_lowest_rq(next_task, this_rq);
+ if (!lowest_rq) {
+ struct task_struct *task;
+ /*
+ * find lock_lowest_rq releases this_rq->lock
+ * so it is possible that next_task has changed.
+ * If it has, then try again.
+ */
+ task = pick_next_highest_task_rt(this_rq);
+ if (unlikely(task != next_task) && task && paranoid--) {
+ put_task_struct(next_task);
+ next_task = task;
+ goto retry;
+ }
+ goto out;
+ }
+
+ assert_spin_locked(&lowest_rq->lock);
+
+ deactivate_task(this_rq, next_task, 0);
+ set_task_cpu(next_task, lowest_rq->cpu);
+ activate_task(lowest_rq, next_task, 0);
+
+ resched_task(lowest_rq->curr);
+
+ spin_unlock(&lowest_rq->lock);
+
+ ret = 1;
+out:
+ put_task_struct(next_task);
+
+ return ret;
+}
+
+/*
+ * TODO: Currently we just use the second highest prio task on
+ * the queue, and stop when it can't migrate (or there's
+ * no more RT tasks). There may be a case where a lower
+ * priority RT task has a different affinity than the
+ * higher RT task. In this case the lower RT task could
+ * possibly be able to migrate where as the higher priority
+ * RT task could not. We currently ignore this issue.
+ * Enhancements are welcome!
+ */
+static void push_rt_tasks(struct rq *rq)
+{
+ /* push_rt_task will return true if it moved an RT */
+ while (push_rt_task(rq))
+ ;
+}
+
+static void schedule_tail_balance_rt(struct rq *rq)
+{
+ /*
+ * If we have more than one rt_task queued, then
+ * see if we can push the other rt_tasks off to other CPUS.
+ * Note we may release the rq lock, and since
+ * the lock was owned by prev, we need to release it
+ * first via finish_lock_switch and then reaquire it here.
+ */
+ if (unlikely(rq->rt.rt_nr_running > 1)) {
+ spin_lock_irq(&rq->lock);
+ push_rt_tasks(rq);
+ spin_unlock_irq(&rq->lock);
+ }
+}
+
/*
* Load-balancing iterator. Note: while the runqueue stays locked
* during the whole iteration, the current task might be
@@ -241,7 +462,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
&rt_rq_iterator);
}
-#endif
+#else /* CONFIG_SMP */
+# define schedule_tail_balance_rt(rq) do { } while (0)
+#endif /* CONFIG_SMP */
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{