summaryrefslogtreecommitdiffstats
path: root/kernel/sched/psi.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-05-27 09:22:11 -0700
committerJakub Kicinski <kuba@kernel.org>2021-05-27 09:55:10 -0700
commit5ada57a9a6b0be0e6dfcbd4afa519b0347fd5649 (patch)
treef82f466df9272bb0c385320b25adc51c45309f84 /kernel/sched/psi.c
parent59c56342459a483d5e563ed8b5fdb77ab7622a73 (diff)
parentd7c5303fbc8ac874ae3e597a5a0d3707dc0230b4 (diff)
downloadlinux-5ada57a9a6b0be0e6dfcbd4afa519b0347fd5649.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
cdc-wdm: s/kill_urbs/poison_urbs/ to fix build Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel/sched/psi.c')
-rw-r--r--kernel/sched/psi.c36
1 files changed, 26 insertions, 10 deletions
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index db27b69fa92a..cc25a3cff41f 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -972,7 +972,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
*/
void cgroup_move_task(struct task_struct *task, struct css_set *to)
{
- unsigned int task_flags = 0;
+ unsigned int task_flags;
struct rq_flags rf;
struct rq *rq;
@@ -987,15 +987,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
rq = task_rq_lock(task, &rf);
- if (task_on_rq_queued(task)) {
- task_flags = TSK_RUNNING;
- if (task_current(rq, task))
- task_flags |= TSK_ONCPU;
- } else if (task->in_iowait)
- task_flags = TSK_IOWAIT;
-
- if (task->in_memstall)
- task_flags |= TSK_MEMSTALL;
+ /*
+ * We may race with schedule() dropping the rq lock between
+ * deactivating prev and switching to next. Because the psi
+ * updates from the deactivation are deferred to the switch
+ * callback to save cgroup tree updates, the task's scheduling
+ * state here is not coherent with its psi state:
+ *
+ * schedule() cgroup_move_task()
+ * rq_lock()
+ * deactivate_task()
+ * p->on_rq = 0
+ * psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates
+ * pick_next_task()
+ * rq_unlock()
+ * rq_lock()
+ * psi_task_change() // old cgroup
+ * task->cgroups = to
+ * psi_task_change() // new cgroup
+ * rq_unlock()
+ * rq_lock()
+ * psi_sched_switch() // does deferred updates in new cgroup
+ *
+ * Don't rely on the scheduling state. Use psi_flags instead.
+ */
+ task_flags = task->psi_flags;
if (task_flags)
psi_task_change(task, task_flags, 0);