diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 11:12:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 11:12:25 -0700 |
commit | adf4bfc4a9ab86be0b72fa9cadc9e7ab6ad15dfe (patch) | |
tree | bfdc5c20da3406cb75aa320c89460cc1b7aa6919 /tools | |
parent | 8adc0486f3c85e3c1e40c1ce6884317a17c380d3 (diff) | |
parent | 8619e94d3be376bb5e8f20988c0e6e3309d2b09a (diff) | |
download | linux-adf4bfc4a9ab86be0b72fa9cadc9e7ab6ad15dfe.tar.bz2 |
Merge tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- cpuset now support isolated cpus.partition type, which will enable
dynamic CPU isolation
- pids.peak added to remember the max number of pids used
- holes in cgroup namespace plugged
- internal cleanups
* tag 'cgroup-for-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (25 commits)
cgroup: use strscpy() is more robust and safer
iocost_monitor: reorder BlkgIterator
cgroup: simplify code in cgroup_apply_control
cgroup: Make cgroup_get_from_id() prettier
cgroup/cpuset: remove unreachable code
cgroup: Remove CFTYPE_PRESSURE
cgroup: Improve cftype add/rm error handling
kselftest/cgroup: Add cpuset v2 partition root state test
cgroup/cpuset: Update description of cpuset.cpus.partition in cgroup-v2.rst
cgroup/cpuset: Make partition invalid if cpumask change violates exclusivity rule
cgroup/cpuset: Relocate a code block in validate_change()
cgroup/cpuset: Show invalid partition reason string
cgroup/cpuset: Add a new isolated cpus.partition type
cgroup/cpuset: Relax constraints to partition & cpus changes
cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective
cgroup/cpuset: Miscellaneous cleanups & add helper functions
cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset
cgroup: add pids.peak interface for pids controller
cgroup: Remove data-race around cgrp_dfl_visible
cgroup: Fix build failure when CONFIG_SHRINKER_DEBUG
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/cgroup/iocost_monitor.py | 10 | ||||
-rw-r--r-- | tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/Makefile | 5 | ||||
-rwxr-xr-x | tools/testing/selftests/cgroup/test_cpuset_prs.sh | 674 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/wait_inotify.c | 87 |
6 files changed, 771 insertions, 8 deletions
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index c4ff907c078b..0dbbc67400fc 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -61,6 +61,11 @@ autop_names = { } class BlkgIterator: + def __init__(self, root_blkcg, q_id, include_dying=False): + self.include_dying = include_dying + self.blkgs = [] + self.walk(root_blkcg, q_id, '') + def blkcg_name(blkcg): return blkcg.css.cgroup.kn.name.string_().decode('utf-8') @@ -82,11 +87,6 @@ class BlkgIterator: blkcg.css.children.address_of_(), 'css.sibling'): self.walk(c, q_id, path) - def __init__(self, root_blkcg, q_id, include_dying=False): - self.include_dying = include_dying - self.blkgs = [] - self.walk(root_blkcg, q_id, '') - def __iter__(self): return iter(self.blkgs) diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 9aa8cdd93de4..435a87556688 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -77,7 +77,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) break; // convert cgroup-id to a map index - cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]); + cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id); elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); if (!elem) continue; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 306ee1b01e72..c4a57e69f749 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -5,3 +5,4 @@ test_freezer test_kmem test_kill test_cpu +wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 478217cc1371..3d263747d2ad 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -Wall -pthread -all: +all: ${HELPER_PROGS} TEST_FILES := with_stress.sh -TEST_PROGS := test_stress.sh +TEST_PROGS := test_stress.sh test_cpuset_prs.sh +TEST_GEN_FILES := wait_inotify TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh new file mode 100755 index 000000000000..526d2c42d870 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -0,0 +1,674 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test for cpuset v2 partition root state (PRS) +# +# The sched verbose flag is set, if available, so that the console log +# can be examined for the correct setting of scheduling domain. +# + +skip_test() { + echo "$1" + echo "Test SKIPPED" + exit 0 +} + +[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" + +# Set sched verbose flag, if available +[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose + +# Get wait_inotify location +WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify + +# Find cgroup v2 mount point +CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') +[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!" + +CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//") +[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" + +# Set verbose flag and delay factor +PROG=$1 +VERBOSE= +DELAY_FACTOR=1 +while [[ "$1" = -* ]] +do + case "$1" in + -v) VERBOSE=1 + break + ;; + -d) DELAY_FACTOR=$2 + shift + break + ;; + *) echo "Usage: $PROG [-v] [-d <delay-factor>" + exit + ;; + esac + shift +done + +cd $CGROUP2 +echo +cpuset > cgroup.subtree_control +[[ -d test ]] || mkdir test +cd test + +# Pause in ms +pause() +{ + DELAY=$1 + LOOP=0 + while [[ $LOOP -lt $DELAY_FACTOR ]] + do + sleep $DELAY + ((LOOP++)) + done + return 0 +} + +console_msg() +{ + MSG=$1 + echo "$MSG" + echo "" > /dev/console + echo "$MSG" > /dev/console + pause 0.01 +} + +test_partition() +{ + EXPECTED_VAL=$1 + echo $EXPECTED_VAL > cpuset.cpus.partition + [[ $? -eq 0 ]] || exit 1 + ACTUAL_VAL=$(cat cpuset.cpus.partition) + [[ $ACTUAL_VAL != $EXPECTED_VAL ]] && { + echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL" + echo "Test FAILED" + exit 1 + } +} + +test_effective_cpus() +{ + EXPECTED_VAL=$1 + ACTUAL_VAL=$(cat cpuset.cpus.effective) + [[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && { + echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'" + echo "Test FAILED" + exit 1 + } +} + +# Adding current process to cgroup.procs as a test +test_add_proc() +{ + OUTSTR="$1" + ERRMSG=$((echo $$ > cgroup.procs) |& cat) + echo $ERRMSG | grep -q "$OUTSTR" + [[ $? -ne 0 ]] && { + echo "cgroup.procs: expect '$OUTSTR', got '$ERRMSG'" + echo "Test FAILED" + exit 1 + } + echo $$ > $CGROUP2/cgroup.procs # Move out the task +} + +# +# Testing the new "isolated" partition root type +# +test_isolated() +{ + echo 2-3 > cpuset.cpus + TYPE=$(cat cpuset.cpus.partition) + [[ $TYPE = member ]] || echo member > cpuset.cpus.partition + + console_msg "Change from member to root" + test_partition root + + console_msg "Change from root to isolated" + test_partition isolated + + console_msg "Change from isolated to member" + test_partition member + + console_msg "Change from member to isolated" + test_partition isolated + + console_msg "Change from isolated to root" + test_partition root + + console_msg "Change from root to member" + test_partition member + + # + # Testing partition root with no cpu + # + console_msg "Distribute all cpus to child partition" + echo +cpuset > cgroup.subtree_control + test_partition root + + mkdir A1 + cd A1 + echo 2-3 > cpuset.cpus + test_partition root + test_effective_cpus 2-3 + cd .. + test_effective_cpus "" + + console_msg "Moving task to partition test" + test_add_proc "No space left" + cd A1 + test_add_proc "" + cd .. + + console_msg "Shrink and expand child partition" + cd A1 + echo 2 > cpuset.cpus + cd .. + test_effective_cpus 3 + cd A1 + echo 2-3 > cpuset.cpus + cd .. + test_effective_cpus "" + + # Cleaning up + console_msg "Cleaning up" + echo $$ > $CGROUP2/cgroup.procs + [[ -d A1 ]] && rmdir A1 +} + +# +# Cpuset controller state transition test matrix. +# +# Cgroup test hierarchy +# +# test -- A1 -- A2 -- A3 +# \- B1 +# +# P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid) +# C<l> = add cpu-list +# S<p> = use prefix in subtree_control +# T = put a task into cgroup +# O<c>-<v> = Write <v> to CPU online file of <c> +# +SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" +TEST_MATRIX=( + # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate + # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ + " S+ C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" + " S+ C0-1 . . C2-3 P1 . . . 0 " + " S+ C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " + " S+ C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " + " S+ C0-1:S+ . . C2-3 . . . P1 0 " + " S+ C0-1:P1 . . C2-3 S+ C1 . . 0 " + " S+ C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " + " S+ C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " + " S+ C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" + " S+ C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" + " S+ C0-1 . . C2-3:P1 . . . C2 0 " + " S+ C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" + " S+ C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" + " S+ C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" + " S+ C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" + " S+ C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" + " S+ C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" + " S+ C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" + " S+ $SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + + # CPU offlining cases: + " S+ C0-1 . . C2-3 S+ C4-5 . O2-0 0 A1:0-1,B1:3" + " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 . . . 0 A1:0-1,A2:3" + " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 O2-1 . . 0 A1:0-1,A2:2-3" + " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 . . . 0 A1:0,A2:2-3" + " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 O1-1 . . 0 A1:0-1,A2:2-3" + " S+ C2-3:P1:S+ C3:P1 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + " S+ C2-3:P1:S+ C3:P2 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P2" + " S+ C2-3:P1:S+ C3:P1 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + " S+ C2-3:P1:S+ C3:P2 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P2" + " S+ C2-3:P1:S+ C3:P1 . . O2-0 . . . 0 A1:,A2:3 A1:P1,A2:P1" + " S+ C2-3:P1:S+ C3:P1 . . O3-0 . . . 0 A1:2,A2: A1:P1,A2:P1" + " S+ C2-3:P1:S+ C3:P1 . . T:O2-0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" + " S+ C2-3:P1:S+ C3:P1 . . . T:O3-0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" + " S+ $SETUP_A123_PARTITIONS . O1-0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . O2-0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . O3-0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . T:O1-0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " S+ $SETUP_A123_PARTITIONS . . T:O2-0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" + " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" + " S+ $SETUP_A123_PARTITIONS . T:O1-0 O1-1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . . T:O2-0 O2-1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 O3-1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O1-1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" + " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O2-1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + + # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate + # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ + # + # Incorrect change to cpuset.cpus invalidates partition root + # + # Adding CPUs to partition root that are not in parent's + # cpuset.cpus is allowed, but those extra CPUs are ignored. + " S+ C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + + # Taking away all CPUs from parent or itself if there are tasks + # will make the partition invalid. + " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" + " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + + # Changing a partition root to member makes child partitions invalid + " S+ C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" + " S+ $SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + + # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long + # as they overlap. + " S+ C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + + # Deletion of CPUs distributed to child cgroup is allowed. + " S+ C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + + # To become a valid partition root, cpuset.cpus must overlap parent's + # cpuset.cpus. + " S+ C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + + # Enabling partition with child cpusets is allowed + " S+ C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + + # A partition root with non-partition root parent is invalid, but it + # can be made valid if its parent becomes a partition root too. + " S+ C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" + " S+ C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + + # A non-exclusive cpuset.cpus change will invalidate partition and its siblings + " S+ C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" + " S+ C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" + " S+ C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + + # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate + # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ + # Failure cases: + + # A task cannot be added to a partition with no cpu + " S+ C2-3:P1:S+ C3:P1 . . O2-0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" +) + +# +# Write to the cpu online file +# $1 - <c>-<v> where <c> = cpu number, <v> value to be written +# +write_cpu_online() +{ + CPU=${1%-*} + VAL=${1#*-} + CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online + if [[ $VAL -eq 0 ]] + then + OFFLINE_CPUS="$OFFLINE_CPUS $CPU" + else + [[ -n "$OFFLINE_CPUS" ]] && { + OFFLINE_CPUS=$(echo $CPU $CPU $OFFLINE_CPUS | fmt -1 |\ + sort | uniq -u) + } + fi + echo $VAL > $CPUFILE + pause 0.01 +} + +# +# Set controller state +# $1 - cgroup directory +# $2 - state +# $3 - showerr +# +# The presence of ":" in state means transition from one to the next. +# +set_ctrl_state() +{ + TMPMSG=/tmp/.msg_$$ + CGRP=$1 + STATE=$2 + SHOWERR=${3}${VERBOSE} + CTRL=${CTRL:=$CONTROLLER} + HASERR=0 + REDIRECT="2> $TMPMSG" + [[ -z "$STATE" || "$STATE" = '.' ]] && return 0 + + rm -f $TMPMSG + for CMD in $(echo $STATE | sed -e "s/:/ /g") + do + TFILE=$CGRP/cgroup.procs + SFILE=$CGRP/cgroup.subtree_control + PFILE=$CGRP/cpuset.cpus.partition + CFILE=$CGRP/cpuset.cpus + S=$(expr substr $CMD 1 1) + if [[ $S = S ]] + then + PREFIX=${CMD#?} + COMM="echo ${PREFIX}${CTRL} > $SFILE" + eval $COMM $REDIRECT + elif [[ $S = C ]] + then + CPUS=${CMD#?} + COMM="echo $CPUS > $CFILE" + eval $COMM $REDIRECT + elif [[ $S = P ]] + then + VAL=${CMD#?} + case $VAL in + 0) VAL=member + ;; + 1) VAL=root + ;; + 2) VAL=isolated + ;; + *) + echo "Invalid partition state - $VAL" + exit 1 + ;; + esac + COMM="echo $VAL > $PFILE" + eval $COMM $REDIRECT + elif [[ $S = O ]] + then + VAL=${CMD#?} + write_cpu_online $VAL + elif [[ $S = T ]] + then + COMM="echo 0 > $TFILE" + eval $COMM $REDIRECT + fi + RET=$? + [[ $RET -ne 0 ]] && { + [[ -n "$SHOWERR" ]] && { + echo "$COMM" + cat $TMPMSG + } + HASERR=1 + } + pause 0.01 + rm -f $TMPMSG + done + return $HASERR +} + +set_ctrl_state_noerr() +{ + CGRP=$1 + STATE=$2 + [[ -d $CGRP ]] || mkdir $CGRP + set_ctrl_state $CGRP $STATE 1 + [[ $? -ne 0 ]] && { + echo "ERROR: Failed to set $2 to cgroup $1!" + exit 1 + } +} + +online_cpus() +{ + [[ -n "OFFLINE_CPUS" ]] && { + for C in $OFFLINE_CPUS + do + write_cpu_online ${C}-1 + done + } +} + +# +# Return 1 if the list of effective cpus isn't the same as the initial list. +# +reset_cgroup_states() +{ + echo 0 > $CGROUP2/cgroup.procs + online_cpus + rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 + set_ctrl_state . S- + pause 0.01 +} + +dump_states() +{ + for DIR in A1 A1/A2 A1/A2/A3 B1 + do + ECPUS=$DIR/cpuset.cpus.effective + PRS=$DIR/cpuset.cpus.partition + [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" + [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" + done +} + +# +# Check effective cpus +# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]* +# +check_effective_cpus() +{ + CHK_STR=$1 + for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + do + set -- $(echo $CHK | sed -e "s/:/ /g") + CGRP=$1 + CPUS=$2 + [[ $CGRP = A2 ]] && CGRP=A1/A2 + [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 + FILE=$CGRP/cpuset.cpus.effective + [[ -e $FILE ]] || return 1 + [[ $CPUS = $(cat $FILE) ]] || return 1 + done +} + +# +# Check cgroup states +# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]* +# +check_cgroup_states() +{ + CHK_STR=$1 + for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + do + set -- $(echo $CHK | sed -e "s/:/ /g") + CGRP=$1 + STATE=$2 + FILE= + EVAL=$(expr substr $STATE 2 2) + [[ $CGRP = A2 ]] && CGRP=A1/A2 + [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 + + case $STATE in + P*) FILE=$CGRP/cpuset.cpus.partition + ;; + *) echo "Unknown state: $STATE!" + exit 1 + ;; + esac + VAL=$(cat $FILE) + + case "$VAL" in + member) VAL=0 + ;; + root) VAL=1 + ;; + isolated) + VAL=2 + ;; + "root invalid"*) + VAL=-1 + ;; + "isolated invalid"*) + VAL=-2 + ;; + esac + [[ $EVAL != $VAL ]] && return 1 + done + return 0 +} + +# +# Run cpuset state transition test +# $1 - test matrix name +# +# This test is somewhat fragile as delays (sleep x) are added in various +# places to make sure state changes are fully propagated before the next +# action. These delays may need to be adjusted if running in a slower machine. +# +run_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + CPULIST=0-6 + I=0 + eval CNT="\${#$TEST[@]}" + + reset_cgroup_states + echo $CPULIST > cpuset.cpus + echo root > cpuset.cpus.partition + console_msg "Running state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > /dev/console + eval set -- "\${$TEST[$I]}" + ROOT=$1 + OLD_A1=$2 + OLD_A2=$3 + OLD_A3=$4 + OLD_B1=$5 + NEW_A1=$6 + NEW_A2=$7 + NEW_A3=$8 + NEW_B1=$9 + RESULT=${10} + ECPUS=${11} + STATES=${12} + + set_ctrl_state_noerr . $ROOT + set_ctrl_state_noerr A1 $OLD_A1 + set_ctrl_state_noerr A1/A2 $OLD_A2 + set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 + set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) + set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) + set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?)) + set_ctrl_state B1 $NEW_B1; ((RETVAL += $?)) + + [[ $RETVAL -ne $RESULT ]] && { + echo "Test $TEST[$I] failed result check!" + eval echo \"\${$TEST[$I]}\" + dump_states + online_cpus + exit 1 + } + + [[ -n "$ECPUS" && "$ECPUS" != . ]] && { + check_effective_cpus $ECPUS + [[ $? -ne 0 ]] && { + echo "Test $TEST[$I] failed effective CPU check!" + eval echo \"\${$TEST[$I]}\" + echo + dump_states + online_cpus + exit 1 + } + } + + [[ -n "$STATES" ]] && { + check_cgroup_states $STATES + [[ $? -ne 0 ]] && { + echo "FAILED: Test $TEST[$I] failed states check!" + eval echo \"\${$TEST[$I]}\" + echo + dump_states + online_cpus + exit 1 + } + } + + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + pause 0.05 + NEWLIST=$(cat cpuset.cpus.effective) + [[ $NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $NEWLIST after test $I!" + exit 1 + } + [[ -n "$VERBOSE" ]] && echo "Test $I done." + ((I++)) + done + echo "All $I tests of $TEST PASSED." + + echo member > cpuset.cpus.partition +} + +# +# Wait for inotify event for the given file and read it +# $1: cgroup file to wait for +# $2: file to store the read result +# +wait_inotify() +{ + CGROUP_FILE=$1 + OUTPUT_FILE=$2 + + $WAIT_INOTIFY $CGROUP_FILE + cat $CGROUP_FILE > $OUTPUT_FILE +} + +# +# Test if inotify events are properly generated when going into and out of +# invalid partition state. +# +test_inotify() +{ + ERR=0 + PRS=/tmp/.prs_$$ + [[ -f $WAIT_INOTIFY ]] || { + echo "wait_inotify not found, inotify test SKIPPED." + return + } + + pause 0.01 + echo 1 > cpuset.cpus + echo 0 > cgroup.procs + echo root > cpuset.cpus.partition + pause 0.01 + rm -f $PRS + wait_inotify $PWD/cpuset.cpus.partition $PRS & + pause 0.01 + set_ctrl_state . "O1-0" + pause 0.01 + check_cgroup_states ".:P-1" + if [[ $? -ne 0 ]] + then + echo "FAILED: Inotify test - partition not invalid" + ERR=1 + elif [[ ! -f $PRS ]] + then + echo "FAILED: Inotify test - event not generated" + ERR=1 + kill %1 + elif [[ $(cat $PRS) != "root invalid"* ]] + then + echo "FAILED: Inotify test - incorrect state" + cat $PRS + ERR=1 + fi + online_cpus + echo member > cpuset.cpus.partition + echo 0 > ../cgroup.procs + if [[ $ERR -ne 0 ]] + then + exit 1 + else + echo "Inotify test PASSED" + fi +} + +run_state_test TEST_MATRIX +test_isolated +test_inotify +echo "All tests PASSED." +cd .. +rmdir test diff --git a/tools/testing/selftests/cgroup/wait_inotify.c b/tools/testing/selftests/cgroup/wait_inotify.c new file mode 100644 index 000000000000..e11b431e1b62 --- /dev/null +++ b/tools/testing/selftests/cgroup/wait_inotify.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Wait until an inotify event on the given cgroup file. + */ +#include <linux/limits.h> +#include <sys/inotify.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static const char usage[] = "Usage: %s [-v] <cgroup_file>\n"; +static char *file; +static int verbose; + +static inline void fail_message(char *msg) +{ + fprintf(stderr, msg, file); + exit(1); +} + +int main(int argc, char *argv[]) +{ + char *cmd = argv[0]; + int c, fd; + struct pollfd fds = { .events = POLLIN, }; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose++; + break; + } + argv++, argc--; + } + + if (argc != 2) { + fprintf(stderr, usage, cmd); + return -1; + } + file = argv[1]; + fd = open(file, O_RDONLY); + if (fd < 0) + fail_message("Cgroup file %s not found!\n"); + close(fd); + + fd = inotify_init(); + if (fd < 0) + fail_message("inotify_init() fails on %s!\n"); + if (inotify_add_watch(fd, file, IN_MODIFY) < 0) + fail_message("inotify_add_watch() fails on %s!\n"); + fds.fd = fd; + + /* + * poll waiting loop + */ + for (;;) { + int ret = poll(&fds, 1, 10000); + + if (ret < 0) { + if (errno == EINTR) + continue; + perror("poll"); + exit(1); + } + if ((ret > 0) && (fds.revents & POLLIN)) + break; + } + if (verbose) { + struct inotify_event events[10]; + long len; + + usleep(1000); + len = read(fd, events, sizeof(events)); + printf("Number of events read = %ld\n", + len/sizeof(struct inotify_event)); + } + close(fd); + return 0; +} |