summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 11:12:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 11:12:25 -0700
commitadf4bfc4a9ab86be0b72fa9cadc9e7ab6ad15dfe (patch)
treebfdc5c20da3406cb75aa320c89460cc1b7aa6919 /tools
parent8adc0486f3c85e3c1e40c1ce6884317a17c380d3 (diff)
parent8619e94d3be376bb5e8f20988c0e6e3309d2b09a (diff)
downloadlinux-adf4bfc4a9ab86be0b72fa9cadc9e7ab6ad15dfe.tar.bz2
Merge tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cpuset now support isolated cpus.partition type, which will enable dynamic CPU isolation - pids.peak added to remember the max number of pids used - holes in cgroup namespace plugged - internal cleanups * tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (25 commits) cgroup: use strscpy() is more robust and safer iocost_monitor: reorder BlkgIterator cgroup: simplify code in cgroup_apply_control cgroup: Make cgroup_get_from_id() prettier cgroup/cpuset: remove unreachable code cgroup: Remove CFTYPE_PRESSURE cgroup: Improve cftype add/rm error handling kselftest/cgroup: Add cpuset v2 partition root state test cgroup/cpuset: Update description of cpuset.cpus.partition in cgroup-v2.rst cgroup/cpuset: Make partition invalid if cpumask change violates exclusivity rule cgroup/cpuset: Relocate a code block in validate_change() cgroup/cpuset: Show invalid partition reason string cgroup/cpuset: Add a new isolated cpus.partition type cgroup/cpuset: Relax constraints to partition & cpus changes cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective cgroup/cpuset: Miscellaneous cleanups & add helper functions cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset cgroup: add pids.peak interface for pids controller cgroup: Remove data-race around cgrp_dfl_visible cgroup: Fix build failure when CONFIG_SHRINKER_DEBUG ...
Diffstat (limited to 'tools')
-rw-r--r--tools/cgroup/iocost_monitor.py10
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c2
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile5
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh674
-rw-r--r--tools/testing/selftests/cgroup/wait_inotify.c87
6 files changed, 771 insertions, 8 deletions
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index c4ff907c078b..0dbbc67400fc 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -61,6 +61,11 @@ autop_names = {
}
class BlkgIterator:
+ def __init__(self, root_blkcg, q_id, include_dying=False):
+ self.include_dying = include_dying
+ self.blkgs = []
+ self.walk(root_blkcg, q_id, '')
+
def blkcg_name(blkcg):
return blkcg.css.cgroup.kn.name.string_().decode('utf-8')
@@ -82,11 +87,6 @@ class BlkgIterator:
blkcg.css.children.address_of_(), 'css.sibling'):
self.walk(c, q_id, path)
- def __init__(self, root_blkcg, q_id, include_dying=False):
- self.include_dying = include_dying
- self.blkgs = []
- self.walk(root_blkcg, q_id, '')
-
def __iter__(self):
return iter(self.blkgs)
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 9aa8cdd93de4..435a87556688 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -77,7 +77,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
break;
// convert cgroup-id to a map index
- cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
+ cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
if (!elem)
continue;
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 306ee1b01e72..c4a57e69f749 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -5,3 +5,4 @@ test_freezer
test_kmem
test_kill
test_cpu
+wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 478217cc1371..3d263747d2ad 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wall -pthread
-all:
+all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
-TEST_PROGS := test_stress.sh
+TEST_PROGS := test_stress.sh test_cpuset_prs.sh
+TEST_GEN_FILES := wait_inotify
TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
new file mode 100755
index 000000000000..526d2c42d870
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -0,0 +1,674 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for cpuset v2 partition root state (PRS)
+#
+# The sched verbose flag is set, if available, so that the console log
+# can be examined for the correct setting of scheduling domain.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 0
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Set sched verbose flag, if available
+[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose
+
+# Get wait_inotify location
+WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
+
+# Find cgroup v2 mount point
+CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
+
+CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//")
+[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+
+# Set verbose flag and delay factor
+PROG=$1
+VERBOSE=
+DELAY_FACTOR=1
+while [[ "$1" = -* ]]
+do
+ case "$1" in
+ -v) VERBOSE=1
+ break
+ ;;
+ -d) DELAY_FACTOR=$2
+ shift
+ break
+ ;;
+ *) echo "Usage: $PROG [-v] [-d <delay-factor>"
+ exit
+ ;;
+ esac
+ shift
+done
+
+cd $CGROUP2
+echo +cpuset > cgroup.subtree_control
+[[ -d test ]] || mkdir test
+cd test
+
+# Pause in ms
+pause()
+{
+ DELAY=$1
+ LOOP=0
+ while [[ $LOOP -lt $DELAY_FACTOR ]]
+ do
+ sleep $DELAY
+ ((LOOP++))
+ done
+ return 0
+}
+
+console_msg()
+{
+ MSG=$1
+ echo "$MSG"
+ echo "" > /dev/console
+ echo "$MSG" > /dev/console
+ pause 0.01
+}
+
+test_partition()
+{
+ EXPECTED_VAL=$1
+ echo $EXPECTED_VAL > cpuset.cpus.partition
+ [[ $? -eq 0 ]] || exit 1
+ ACTUAL_VAL=$(cat cpuset.cpus.partition)
+ [[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
+ echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+test_effective_cpus()
+{
+ EXPECTED_VAL=$1
+ ACTUAL_VAL=$(cat cpuset.cpus.effective)
+ [[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
+ echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+# Adding current process to cgroup.procs as a test
+test_add_proc()
+{
+ OUTSTR="$1"
+ ERRMSG=$((echo $$ > cgroup.procs) |& cat)
+ echo $ERRMSG | grep -q "$OUTSTR"
+ [[ $? -ne 0 ]] && {
+ echo "cgroup.procs: expect '$OUTSTR', got '$ERRMSG'"
+ echo "Test FAILED"
+ exit 1
+ }
+ echo $$ > $CGROUP2/cgroup.procs # Move out the task
+}
+
+#
+# Testing the new "isolated" partition root type
+#
+test_isolated()
+{
+ echo 2-3 > cpuset.cpus
+ TYPE=$(cat cpuset.cpus.partition)
+ [[ $TYPE = member ]] || echo member > cpuset.cpus.partition
+
+ console_msg "Change from member to root"
+ test_partition root
+
+ console_msg "Change from root to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to member"
+ test_partition member
+
+ console_msg "Change from member to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to root"
+ test_partition root
+
+ console_msg "Change from root to member"
+ test_partition member
+
+ #
+ # Testing partition root with no cpu
+ #
+ console_msg "Distribute all cpus to child partition"
+ echo +cpuset > cgroup.subtree_control
+ test_partition root
+
+ mkdir A1
+ cd A1
+ echo 2-3 > cpuset.cpus
+ test_partition root
+ test_effective_cpus 2-3
+ cd ..
+ test_effective_cpus ""
+
+ console_msg "Moving task to partition test"
+ test_add_proc "No space left"
+ cd A1
+ test_add_proc ""
+ cd ..
+
+ console_msg "Shrink and expand child partition"
+ cd A1
+ echo 2 > cpuset.cpus
+ cd ..
+ test_effective_cpus 3
+ cd A1
+ echo 2-3 > cpuset.cpus
+ cd ..
+ test_effective_cpus ""
+
+ # Cleaning up
+ console_msg "Cleaning up"
+ echo $$ > $CGROUP2/cgroup.procs
+ [[ -d A1 ]] && rmdir A1
+}
+
+#
+# Cpuset controller state transition test matrix.
+#
+# Cgroup test hierarchy
+#
+# test -- A1 -- A2 -- A3
+# \- B1
+#
+# P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid)
+# C<l> = add cpu-list
+# S<p> = use prefix in subtree_control
+# T = put a task into cgroup
+# O<c>-<v> = Write <v> to CPU online file of <c>
+#
+SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
+TEST_MATRIX=(
+ # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
+ # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+ " S+ C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
+ " S+ C0-1 . . C2-3 P1 . . . 0 "
+ " S+ C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
+ " S+ C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
+ " S+ C0-1:S+ . . C2-3 . . . P1 0 "
+ " S+ C0-1:P1 . . C2-3 S+ C1 . . 0 "
+ " S+ C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
+ " S+ C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
+ " S+ C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
+ " S+ C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
+ " S+ C0-1 . . C2-3:P1 . . . C2 0 "
+ " S+ C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
+ " S+ C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
+ " S+ C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
+ " S+ C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ " S+ C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
+ " S+ C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
+ " S+ C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
+ " S+ $SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+
+ # CPU offlining cases:
+ " S+ C0-1 . . C2-3 S+ C4-5 . O2-0 0 A1:0-1,B1:3"
+ " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 . . . 0 A1:0-1,A2:3"
+ " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 O2-1 . . 0 A1:0-1,A2:2-3"
+ " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 . . . 0 A1:0,A2:2-3"
+ " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 O1-1 . . 0 A1:0-1,A2:2-3"
+ " S+ C2-3:P1:S+ C3:P1 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ " S+ C2-3:P1:S+ C3:P2 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ " S+ C2-3:P1:S+ C3:P1 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ " S+ C2-3:P1:S+ C3:P2 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ " S+ C2-3:P1:S+ C3:P1 . . O2-0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ " S+ C2-3:P1:S+ C3:P1 . . O3-0 . . . 0 A1:2,A2: A1:P1,A2:P1"
+ " S+ C2-3:P1:S+ C3:P1 . . T:O2-0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
+ " S+ C2-3:P1:S+ C3:P1 . . . T:O3-0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
+ " S+ $SETUP_A123_PARTITIONS . O1-0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . O2-0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . O3-0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . T:O1-0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ " S+ $SETUP_A123_PARTITIONS . . T:O2-0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
+ " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
+ " S+ $SETUP_A123_PARTITIONS . T:O1-0 O1-1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . . T:O2-0 O2-1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 O3-1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O1-1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O2-1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+
+ # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
+ # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+ #
+ # Incorrect change to cpuset.cpus invalidates partition root
+ #
+ # Adding CPUs to partition root that are not in parent's
+ # cpuset.cpus is allowed, but those extra CPUs are ignored.
+ " S+ C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+
+ # Taking away all CPUs from parent or itself if there are tasks
+ # will make the partition invalid.
+ " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
+ " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+
+ # Changing a partition root to member makes child partitions invalid
+ " S+ C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
+ " S+ $SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+
+ # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
+ # as they overlap.
+ " S+ C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+
+ # Deletion of CPUs distributed to child cgroup is allowed.
+ " S+ C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+
+ # To become a valid partition root, cpuset.cpus must overlap parent's
+ # cpuset.cpus.
+ " S+ C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+
+ # Enabling partition with child cpusets is allowed
+ " S+ C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+
+ # A partition root with non-partition root parent is invalid, but it
+ # can be made valid if its parent becomes a partition root too.
+ " S+ C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
+ " S+ C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+
+ # A non-exclusive cpuset.cpus change will invalidate partition and its siblings
+ " S+ C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
+ " S+ C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
+ " S+ C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+
+ # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
+ # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+ # Failure cases:
+
+ # A task cannot be added to a partition with no cpu
+ " S+ C2-3:P1:S+ C3:P1 . . O2-0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+)
+
+#
+# Write to the cpu online file
+# $1 - <c>-<v> where <c> = cpu number, <v> value to be written
+#
+write_cpu_online()
+{
+ CPU=${1%-*}
+ VAL=${1#*-}
+ CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
+ if [[ $VAL -eq 0 ]]
+ then
+ OFFLINE_CPUS="$OFFLINE_CPUS $CPU"
+ else
+ [[ -n "$OFFLINE_CPUS" ]] && {
+ OFFLINE_CPUS=$(echo $CPU $CPU $OFFLINE_CPUS | fmt -1 |\
+ sort | uniq -u)
+ }
+ fi
+ echo $VAL > $CPUFILE
+ pause 0.01
+}
+
+#
+# Set controller state
+# $1 - cgroup directory
+# $2 - state
+# $3 - showerr
+#
+# The presence of ":" in state means transition from one to the next.
+#
+set_ctrl_state()
+{
+ TMPMSG=/tmp/.msg_$$
+ CGRP=$1
+ STATE=$2
+ SHOWERR=${3}${VERBOSE}
+ CTRL=${CTRL:=$CONTROLLER}
+ HASERR=0
+ REDIRECT="2> $TMPMSG"
+ [[ -z "$STATE" || "$STATE" = '.' ]] && return 0
+
+ rm -f $TMPMSG
+ for CMD in $(echo $STATE | sed -e "s/:/ /g")
+ do
+ TFILE=$CGRP/cgroup.procs
+ SFILE=$CGRP/cgroup.subtree_control
+ PFILE=$CGRP/cpuset.cpus.partition
+ CFILE=$CGRP/cpuset.cpus
+ S=$(expr substr $CMD 1 1)
+ if [[ $S = S ]]
+ then
+ PREFIX=${CMD#?}
+ COMM="echo ${PREFIX}${CTRL} > $SFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = C ]]
+ then
+ CPUS=${CMD#?}
+ COMM="echo $CPUS > $CFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = P ]]
+ then
+ VAL=${CMD#?}
+ case $VAL in
+ 0) VAL=member
+ ;;
+ 1) VAL=root
+ ;;
+ 2) VAL=isolated
+ ;;
+ *)
+ echo "Invalid partition state - $VAL"
+ exit 1
+ ;;
+ esac
+ COMM="echo $VAL > $PFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = O ]]
+ then
+ VAL=${CMD#?}
+ write_cpu_online $VAL
+ elif [[ $S = T ]]
+ then
+ COMM="echo 0 > $TFILE"
+ eval $COMM $REDIRECT
+ fi
+ RET=$?
+ [[ $RET -ne 0 ]] && {
+ [[ -n "$SHOWERR" ]] && {
+ echo "$COMM"
+ cat $TMPMSG
+ }
+ HASERR=1
+ }
+ pause 0.01
+ rm -f $TMPMSG
+ done
+ return $HASERR
+}
+
+set_ctrl_state_noerr()
+{
+ CGRP=$1
+ STATE=$2
+ [[ -d $CGRP ]] || mkdir $CGRP
+ set_ctrl_state $CGRP $STATE 1
+ [[ $? -ne 0 ]] && {
+ echo "ERROR: Failed to set $2 to cgroup $1!"
+ exit 1
+ }
+}
+
+online_cpus()
+{
+ [[ -n "OFFLINE_CPUS" ]] && {
+ for C in $OFFLINE_CPUS
+ do
+ write_cpu_online ${C}-1
+ done
+ }
+}
+
+#
+# Return 1 if the list of effective cpus isn't the same as the initial list.
+#
+reset_cgroup_states()
+{
+ echo 0 > $CGROUP2/cgroup.procs
+ online_cpus
+ rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
+ set_ctrl_state . S-
+ pause 0.01
+}
+
+dump_states()
+{
+ for DIR in A1 A1/A2 A1/A2/A3 B1
+ do
+ ECPUS=$DIR/cpuset.cpus.effective
+ PRS=$DIR/cpuset.cpus.partition
+ [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
+ [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
+ done
+}
+
+#
+# Check effective cpus
+# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
+#
+check_effective_cpus()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ CPUS=$2
+ [[ $CGRP = A2 ]] && CGRP=A1/A2
+ [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
+ FILE=$CGRP/cpuset.cpus.effective
+ [[ -e $FILE ]] || return 1
+ [[ $CPUS = $(cat $FILE) ]] || return 1
+ done
+}
+
+#
+# Check cgroup states
+# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
+#
+check_cgroup_states()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ STATE=$2
+ FILE=
+ EVAL=$(expr substr $STATE 2 2)
+ [[ $CGRP = A2 ]] && CGRP=A1/A2
+ [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
+
+ case $STATE in
+ P*) FILE=$CGRP/cpuset.cpus.partition
+ ;;
+ *) echo "Unknown state: $STATE!"
+ exit 1
+ ;;
+ esac
+ VAL=$(cat $FILE)
+
+ case "$VAL" in
+ member) VAL=0
+ ;;
+ root) VAL=1
+ ;;
+ isolated)
+ VAL=2
+ ;;
+ "root invalid"*)
+ VAL=-1
+ ;;
+ "isolated invalid"*)
+ VAL=-2
+ ;;
+ esac
+ [[ $EVAL != $VAL ]] && return 1
+ done
+ return 0
+}
+
+#
+# Run cpuset state transition test
+# $1 - test matrix name
+#
+# This test is somewhat fragile as delays (sleep x) are added in various
+# places to make sure state changes are fully propagated before the next
+# action. These delays may need to be adjusted if running in a slower machine.
+#
+run_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ CPULIST=0-6
+ I=0
+ eval CNT="\${#$TEST[@]}"
+
+ reset_cgroup_states
+ echo $CPULIST > cpuset.cpus
+ echo root > cpuset.cpus.partition
+ console_msg "Running state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > /dev/console
+ eval set -- "\${$TEST[$I]}"
+ ROOT=$1
+ OLD_A1=$2
+ OLD_A2=$3
+ OLD_A3=$4
+ OLD_B1=$5
+ NEW_A1=$6
+ NEW_A2=$7
+ NEW_A3=$8
+ NEW_B1=$9
+ RESULT=${10}
+ ECPUS=${11}
+ STATES=${12}
+
+ set_ctrl_state_noerr . $ROOT
+ set_ctrl_state_noerr A1 $OLD_A1
+ set_ctrl_state_noerr A1/A2 $OLD_A2
+ set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ set_ctrl_state_noerr B1 $OLD_B1
+ RETVAL=0
+ set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
+ set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
+ set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
+ set_ctrl_state B1 $NEW_B1; ((RETVAL += $?))
+
+ [[ $RETVAL -ne $RESULT ]] && {
+ echo "Test $TEST[$I] failed result check!"
+ eval echo \"\${$TEST[$I]}\"
+ dump_states
+ online_cpus
+ exit 1
+ }
+
+ [[ -n "$ECPUS" && "$ECPUS" != . ]] && {
+ check_effective_cpus $ECPUS
+ [[ $? -ne 0 ]] && {
+ echo "Test $TEST[$I] failed effective CPU check!"
+ eval echo \"\${$TEST[$I]}\"
+ echo
+ dump_states
+ online_cpus
+ exit 1
+ }
+ }
+
+ [[ -n "$STATES" ]] && {
+ check_cgroup_states $STATES
+ [[ $? -ne 0 ]] && {
+ echo "FAILED: Test $TEST[$I] failed states check!"
+ eval echo \"\${$TEST[$I]}\"
+ echo
+ dump_states
+ online_cpus
+ exit 1
+ }
+ }
+
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ pause 0.05
+ NEWLIST=$(cat cpuset.cpus.effective)
+ [[ $NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $NEWLIST after test $I!"
+ exit 1
+ }
+ [[ -n "$VERBOSE" ]] && echo "Test $I done."
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+
+ echo member > cpuset.cpus.partition
+}
+
+#
+# Wait for inotify event for the given file and read it
+# $1: cgroup file to wait for
+# $2: file to store the read result
+#
+wait_inotify()
+{
+ CGROUP_FILE=$1
+ OUTPUT_FILE=$2
+
+ $WAIT_INOTIFY $CGROUP_FILE
+ cat $CGROUP_FILE > $OUTPUT_FILE
+}
+
+#
+# Test if inotify events are properly generated when going into and out of
+# invalid partition state.
+#
+test_inotify()
+{
+ ERR=0
+ PRS=/tmp/.prs_$$
+ [[ -f $WAIT_INOTIFY ]] || {
+ echo "wait_inotify not found, inotify test SKIPPED."
+ return
+ }
+
+ pause 0.01
+ echo 1 > cpuset.cpus
+ echo 0 > cgroup.procs
+ echo root > cpuset.cpus.partition
+ pause 0.01
+ rm -f $PRS
+ wait_inotify $PWD/cpuset.cpus.partition $PRS &
+ pause 0.01
+ set_ctrl_state . "O1-0"
+ pause 0.01
+ check_cgroup_states ".:P-1"
+ if [[ $? -ne 0 ]]
+ then
+ echo "FAILED: Inotify test - partition not invalid"
+ ERR=1
+ elif [[ ! -f $PRS ]]
+ then
+ echo "FAILED: Inotify test - event not generated"
+ ERR=1
+ kill %1
+ elif [[ $(cat $PRS) != "root invalid"* ]]
+ then
+ echo "FAILED: Inotify test - incorrect state"
+ cat $PRS
+ ERR=1
+ fi
+ online_cpus
+ echo member > cpuset.cpus.partition
+ echo 0 > ../cgroup.procs
+ if [[ $ERR -ne 0 ]]
+ then
+ exit 1
+ else
+ echo "Inotify test PASSED"
+ fi
+}
+
+run_state_test TEST_MATRIX
+test_isolated
+test_inotify
+echo "All tests PASSED."
+cd ..
+rmdir test
diff --git a/tools/testing/selftests/cgroup/wait_inotify.c b/tools/testing/selftests/cgroup/wait_inotify.c
new file mode 100644
index 000000000000..e11b431e1b62
--- /dev/null
+++ b/tools/testing/selftests/cgroup/wait_inotify.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wait until an inotify event on the given cgroup file.
+ */
+#include <linux/limits.h>
+#include <sys/inotify.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static const char usage[] = "Usage: %s [-v] <cgroup_file>\n";
+static char *file;
+static int verbose;
+
+static inline void fail_message(char *msg)
+{
+ fprintf(stderr, msg, file);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ char *cmd = argv[0];
+ int c, fd;
+ struct pollfd fds = { .events = POLLIN, };
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose++;
+ break;
+ }
+ argv++, argc--;
+ }
+
+ if (argc != 2) {
+ fprintf(stderr, usage, cmd);
+ return -1;
+ }
+ file = argv[1];
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ fail_message("Cgroup file %s not found!\n");
+ close(fd);
+
+ fd = inotify_init();
+ if (fd < 0)
+ fail_message("inotify_init() fails on %s!\n");
+ if (inotify_add_watch(fd, file, IN_MODIFY) < 0)
+ fail_message("inotify_add_watch() fails on %s!\n");
+ fds.fd = fd;
+
+ /*
+ * poll waiting loop
+ */
+ for (;;) {
+ int ret = poll(&fds, 1, 10000);
+
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("poll");
+ exit(1);
+ }
+ if ((ret > 0) && (fds.revents & POLLIN))
+ break;
+ }
+ if (verbose) {
+ struct inotify_event events[10];
+ long len;
+
+ usleep(1000);
+ len = read(fd, events, sizeof(events));
+ printf("Number of events read = %ld\n",
+ len/sizeof(struct inotify_event));
+ }
+ close(fd);
+ return 0;
+}