From 940442deea98b3280061095dd811e6136f1b41f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 09:12:55 -0300 Subject: tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: b04d910af330b55e ("vdpa: support exposing the count of vqs to userspace") a61280ddddaa45f9 ("vdpa: support exposing the config size to userspace") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h $ diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h --- tools/include/uapi/linux/vhost.h 2021-07-15 16:17:01.840818309 -0300 +++ include/uapi/linux/vhost.h 2022-04-02 18:55:05.702522387 -0300 @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2022-04-04 14:52:25.036375145 -0300 +++ after 2022-04-04 14:52:31.906549976 -0300 @@ -38,4 +38,6 @@ [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", [0x78] = "VDPA_GET_IOVA_RANGE", + [0x79] = "VDPA_GET_CONFIG_SIZE", + [0x80] = "VDPA_GET_VQS_COUNT", }; $ Cc: Longpeng Cc: Michael S. Tsirkin Link: https://lore.kernel.org/lkml/YksxoFcOARk%2Fldev@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c998860d7bbc..5d99e7c242a2 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- cgit v1.2.3 From 541f695cbcb6932c22638b06e0cbe1d56177e2e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Apr 2022 17:28:48 -0300 Subject: tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts Just like its done for ldopts and for both in tools/perf/Makefile.config. Using `` to initialize PERL_EMBED_CCOPTS somehow precludes using: $(filter-out SOMETHING_TO_FILTER,$(PERL_EMBED_CCOPTS)) And we need to do it to allow for building with versions of clang where some gcc options selected by distros are not available. Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YktYX2OnLtyobRYD@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c792e..90774b60d31b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -217,7 +217,7 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) $(OUTPUT)test-libperl.bin: -- cgit v1.2.3 From 41caff459a5b956b3e23ba9ca759dd0629ad3dda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Apr 2022 10:33:21 -0300 Subject: tools build: Filter out options and warnings not supported by clang These make the feature check fail when using clang, so remove them just like is done in tools/perf/Makefile.config to build perf itself. Adding -Wno-compound-token-split-by-macro to tools/perf/Makefile.config when building with clang is also necessary to avoid these warnings turned into errors (-Werror): CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o In file included from util/scripting-engines/trace-event-perl.c:35: In file included from /usr/lib64/perl5/CORE/perl.h:4085: In file included from /usr/lib64/perl5/CORE/hv.h:659: In file included from /usr/lib64/perl5/CORE/hv_func.h:34: In file included from /usr/lib64/perl5/CORE/sbox32_hash.h:4: /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '(' and '{' tokens introducing statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:38: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^~~~~~~~~~ /usr/lib64/perl5/CORE/perl.h:737:29: note: expanded from macro 'STMT_START' # define STMT_START (void)( /* gcc supports "({ STATEMENTS; })" */ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: '{' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:49: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '}' and ')' tokens terminating statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:87:41: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' v ^= (v>>23); \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: ')' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:88:3: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' } STMT_END ^~~~~~~~ /usr/lib64/perl5/CORE/perl.h:738:21: note: expanded from macro 'STMT_END' # define STMT_END ) ^ Please refer to the discussion on the Link: tag below, where Nathan clarifies the situation: acme> And then get to the problems at the end of this message, which seem acme> similar to the problem described here: acme> acme> From Nathan Chancellor <> acme> Subject [PATCH] mwifiex: Remove unnecessary braces from HostCmd_SET_SEQ_NO_BSS_INFO acme> acme> https://lkml.org/lkml/2020/9/1/135 acme> acme> So perhaps in this case its better to disable that acme> -Werror,-Wcompound-token-split-by-macro when building with clang? Yes, I think that is probably the best solution. As far as I can tell, at least in this file and context, the warning appears harmless, as the "create a GNU C statement expression from two different macros" is very much intentional, based on the presence of PERL_USE_GCC_BRACE_GROUPS. The warning is fixed in upstream Perl by just avoiding creating GNU C statement expressions using STMT_START and STMT_END: https://github.com/Perl/perl5/issues/18780 https://github.com/Perl/perl5/pull/18984 If I am reading the source code correctly, an alternative to disabling the warning would be specifying -DPERL_GCC_BRACE_GROUPS_FORBIDDEN but it seems like that might end up impacting more than just this site, according to the issue discussion above. Based-on-a-patch-by: Sedat Dilek Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YkxWcYzph5pC1EK8@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 7 +++++++ tools/perf/Makefile.config | 3 +++ 2 files changed, 10 insertions(+) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 90774b60d31b..de66e1cc0734 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -220,6 +220,13 @@ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6a8..5b5ba475a5c0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -790,6 +790,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif -- cgit v1.2.3 From dd6e1fe91cdd52774ca642d1da75b58a86356b56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2022 10:08:07 -0300 Subject: perf python: Fix probing for some clang command line options The clang compiler complains about some options even without a source file being available, while others require one, so use the simple tools/build/feature/test-hello.c file. Then check for the "is not supported" string in its output, in addition to the "unknown argument" already being looked for. This was noticed when building with clang-13 where -ffat-lto-objects isn't supported and since we were looking just for "unknown argument" and not providing a source code to clang, was mistakenly assumed as being available and not being filtered to set of command line options provided to clang, leading to a build failure. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 483f05004e68..6156bb87ee3e 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars -- cgit v1.2.3 From 3a8a0475861a443f02e3a9b57d044fe2a0a99291 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Apr 2022 11:04:20 -0300 Subject: perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-13 Using -ffat-lto-objects in the python feature test when building with clang-13 results in: clang-13: error: optimization flag '-ffat-lto-objects' is not supported [-Werror,-Wignored-optimization-argument] error: command '/usr/sbin/clang' failed with exit code 1 cp: cannot stat '/tmp/build/perf/python_ext_build/lib/perf*.so': No such file or directory make[2]: *** [Makefile.perf:639: /tmp/build/perf/python/perf.so] Error 1 Noticed when building on a docker.io/library/archlinux:base container. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +++ tools/perf/util/setup.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5b5ba475a5c0..f3bf9297bcc0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 6156bb87ee3e..c255a2c90cd6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,6 +25,8 @@ if cc_is_clang: vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension -- cgit v1.2.3 From 290fa68bdc4588637849adb8534301a1e62beee2 Mon Sep 17 00:00:00 2001 From: Chengdong Li Date: Fri, 8 Apr 2022 16:47:48 +0800 Subject: perf test tsc: Fix error message when not supported By default `perf test tsc` does not return the error message when the child process detected kernel does not support it. Instead, the child process prints an error message to stderr, unfortunately stderr is redirected to /dev/null when verbose <= 0. This patch does: - return TEST_SKIP to the parent process instead of TEST_OK when perf_read_tsc_conversion() is not supported. - Add a new subtest of testing if TSC is supported on current architecture by moving exist code to a separate function. It avoids two places in test__perf_time_to_tsc() that return TEST_SKIP by doing this. - Extend the test suite definition to contain above two subtests. Current test_suite and test_case structs do not support printing skip reason when the number of subtest less than 1. To print skip reason, it is necessary to extend current test suite definition. Reviewed-by: Adrian Hunter Signed-off-by: Chengdong Li Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: likexu@tencent.com Link: https://lore.kernel.org/r/20220408084748.43707-1-chengdongli@tencent.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d12d0ad81801..cc6df49a65a1 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -47,6 +47,17 @@ } \ } +static int test__tsc_is_supported(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + if (!TSC_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture\n"); + return TEST_SKIP; + } + + return TEST_OK; +} + /** * test__perf_time_to_tsc - test converting perf time to TSC. * @@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su struct perf_cpu_map *cpus = NULL; struct evlist *evlist = NULL; struct evsel *evsel = NULL; - int err = -1, ret, i; + int err = TEST_FAIL, ret, i; const char *comm1, *comm2; struct perf_tsc_conversion tc; struct perf_event_mmap_page *pc; @@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su u64 test_time, comm1_time = 0, comm2_time = 0; struct mmap *md; - if (!TSC_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -124,8 +131,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; + pr_debug("perf_read_tsc_conversion is not supported in current kernel\n"); + err = TEST_SKIP; } goto out_err; } @@ -191,7 +198,7 @@ next_event: test_tsc >= comm2_tsc) goto out_err; - err = 0; + err = TEST_OK; out_err: evlist__delete(evlist); @@ -200,4 +207,15 @@ out_err: return err; } -DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc); +static struct test_case time_to_tsc_tests[] = { + TEST_CASE_REASON("TSC support", tsc_is_supported, + "This architecture does not support"), + TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc, + "perf_read_tsc_conversion is not supported"), + { .name = NULL, } +}; + +struct test_suite suite__perf_time_to_tsc = { + .desc = "Convert perf time to TSC", + .test_cases = time_to_tsc_tests, +}; -- cgit v1.2.3 From 278aaba2c555a54e62aec40e04defaa9fffcc1c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 9 Apr 2022 11:48:15 -0300 Subject: tools headers arm64: Sync arm64's cputype.h with the kernel sources To get the changes in: 83bea32ac7ed37bb ("arm64: Add part number for Arm Cortex-A78AE") That addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/asm/cputype.h' differs from latest version at 'arch/arm64/include/asm/cputype.h' diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Cc: Ali Saidi Cc: Andrew Kilroy Cc: Chanho Park Cc: German Gomez Cc: James Clark Cc: John Garry Cc: Leo Yan Cc: Will Deacon Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9afcc6467a09..e09d6908a21d 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) -- cgit v1.2.3 From fa7095c5c3240bb2ecbc77f8b69be9b1d9e2cf60 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 6 Apr 2022 15:56:51 +0100 Subject: perf unwind: Don't show unwind error messages when augmenting frame pointer stack Commit Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") intended to add a 'best effort' DWARF unwind that improved the frame pointer stack in most scenarios. It's expected that the unwind will fail sometimes, but this shouldn't be reported as an error. It only works when the return address can be determined from the contents of the link register alone. Fix the error shown when the unwinder requires extra registers by adding a new flag that suppresses error messages. This flag is not set in the normal --call-graph=dwarf unwind mode so that behavior is not changed. Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") Reported-by: John Garry Signed-off-by: James Clark Tested-by: John Garry Cc: Alexander Shishkin Cc: Alexandre Truong Cc: German Gomez Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220406145651.1392529-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 2 +- tools/perf/util/arm64-frame-pointer-unwind-support.c | 2 +- tools/perf/util/machine.c | 2 +- tools/perf/util/unwind-libdw.c | 10 +++++++--- tools/perf/util/unwind-libdw.h | 1 + tools/perf/util/unwind-libunwind-local.c | 10 +++++++--- tools/perf/util/unwind-libunwind.c | 6 ++++-- tools/perf/util/unwind.h | 13 ++++++++++--- 8 files changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d262060..afdca7f2959f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr } err = unwind__get_entries(unwind_entry, &cnt, thread, - &sample, MAX_STACK); + &sample, MAX_STACK, false); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 2242a885fbd7..4940be4a0569 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; } - ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); sample->user_regs = old_regs; if (ret || entries.length != 2) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b80048546451..95391236f5f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index a74b517f7497..94aa40f6e348 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg) bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg) report_module(pc, ui); if (!dwfl_frame_pc(state, &pc, &isactivation)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, - int max_stack) + int max_stack, + bool best_effort) { struct unwind_info *ui, ui_buf = { .sample = data, @@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .cb = cb, .arg = arg, .max_stack = max_stack, + .best_effort = best_effort }; Dwarf_Word ip; int err = -EINVAL, i; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 0cbd2650e280..8c88bc4f2304 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -20,6 +20,7 @@ struct unwind_info { void *arg; int max_stack; int idx; + bool best_effort; struct unwind_entry entries[]; }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 71a353349181..41e29fc7648a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -96,6 +96,7 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; + bool best_effort; }; #define dw_read(ptr, type, end) ({ \ @@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as, ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); + if (!ui->best_effort) + pr_err("unwind: can't read reg %d\n", regnum); return ret; } @@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, return -1; ret = unw_init_remote(&c, addr_space, ui); - if (ret) + if (ret && !ui->best_effort) display_error(ret); while (!ret && (unw_step(&c) > 0) && i < max_stack) { @@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->maps->machine, + .best_effort = best_effort }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e89a5479b361..509c287ee762 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { if (thread->maps->unwind_libunwind_ops) - return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, + max_stack, best_effort); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ab8ad469c8de..b2a03fa5289b 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -23,13 +23,19 @@ struct unwind_libunwind_ops { void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, bool best_effort); }; #ifdef HAVE_DWARF_UNWIND_SUPPORT +/* + * When best_effort is set, don't report errors and fail silently. This could + * be expanded in the future to be more permissive about things other than + * error messages. + */ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, + bool best_effort); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT #ifndef LIBUNWIND__ARCH_REG_ID @@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, - int max_stack __maybe_unused) + int max_stack __maybe_unused, + bool best_effort __maybe_unused) { return 0; } -- cgit v1.2.3 From ffab487052054162b3b6c9c6005777ec6cfcea05 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 8 Apr 2022 15:40:56 +0100 Subject: perf: arm-spe: Fix perf report --mem-mode Since commit bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") "perf mem report" and "perf report --mem-mode" don't allow opening the file unless one of the events has PERF_SAMPLE_DATA_SRC set. SPE doesn't have this set even though synthetic memory data is generated after it is decoded. Fix this issue by setting DATA_SRC on SPE events. This has no effect on the data collected because the SPE driver doesn't do anything with that flag and doesn't generate samples. Fixes: bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") Signed-off-by: James Clark Tested-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: German Gomez Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Ravi Bangoria Cc: Will Deacon Link: https://lore.kernel.org/r/20220408144056.1955535-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/arm-spe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 86e2e926aa0e..af4d63af8072 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) -- cgit v1.2.3 From aeee9dc53ce405d2161f9915f553114e94e5b677 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Apr 2022 16:26:25 +0300 Subject: perf tools: Fix perf's libperf_print callback eprintf() does not expect va_list as the type of the 4th parameter. Use veprintf() because it does. Signed-off-by: Adrian Hunter Fixes: 428dab813a56ce94 ("libperf: Merge libperf_set_print() into libperf_init()") Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220408132625.2451452-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b42..6aae7b6c376b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) -- cgit v1.2.3 From c9c2a427dd9fe0e73eceacafb42d54f3c4535693 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:10 +0530 Subject: perf bench: Fix futex bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench futex' testcase fails on systems with more than 1K CPUs. Testcase: perf bench futex all Failure snippet: <<>>Running futex/hash benchmark... perf: pthread_create: No such file or directory <<>> All the futex benchmarks (ie hash, lock-api, requeue, wake, wake-parallel), pthread_create is invoked in respective bench_futex_* function. Though the logs shows direct failure from pthread_create, strace logs showed that actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the futex benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Reviewed-by: Srikar Dronamraju Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 26 +++++++++++++++++++------- tools/perf/bench/futex-lock-pi.c | 21 ++++++++++++++++----- tools/perf/bench/futex-requeue.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake-parallel.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake.c | 22 ++++++++++++++++------ 5 files changed, 83 insertions(+), 28 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index dbcecec4eeda..f05db4cf983d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -122,12 +122,14 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpuset; + cpu_set_t *cpuset; struct sigaction act; unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; struct perf_cpu_map *cpu; + int nrcpus; + size_t size; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); + + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); - + } ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, (void *)(struct worker *) &worker[i]); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6fc9a3d55c1f..0abb3f7ee24f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -120,11 +120,17 @@ static void *workerfn(void *arg) static void create_threads(struct worker *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, } else worker[i].futex = &global_futex; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } int bench_futex_lock_pi(int argc, const char **argv) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2f59d5d1c509..b6faabfafb8e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 861deb934745..e47f46a3a47e 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void print_run(struct thread_data *waking_worker, unsigned int run_num) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index cfda48bef1d7..201a3555f09a 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -97,22 +97,32 @@ static void print_summary(void) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; - + size_t size; + int nrcpus = perf_cpu_map__nr(cpu); threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, -- cgit v1.2.3 From 299687e18a06aa648c8d4ebb025b322ac83fe7dd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:11 +0530 Subject: perf bench: Fix epoll bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench epoll' testcase fails on systems with more than 1K CPUs. Testcase: perf bench epoll all Result snippet: <<>> Run summary [PID 106497]: 1399 threads monitoring on 64 file-descriptors for 8 secs. perf: pthread_create: No such file or directory <<>> In epoll benchmarks (ctl, wait) pthread_create is invoked in do_threads from respective bench_epoll_* function. Though the logs shows direct failure from pthread_create, the actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the epoll benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/epoll-ctl.c | 25 +++++++++++++++++++------ tools/perf/bench/epoll-wait.c | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 134612bde0cb..4256dc5d6236 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0; + int nrcpus; + size_t size; if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) init_fdmaps(w, 50); if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 37de970c9743..2728b0140853 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -291,9 +291,11 @@ static void print_summary(void) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0, events = EPOLLIN; + int nrcpus; + size_t size; if (oneshot) events |= EPOLLONESHOT; @@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) } if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); -- cgit v1.2.3 From bc21e74d4775f883ae1f542c1f1dc7205b15d925 Mon Sep 17 00:00:00 2001 From: Denis Nikitin Date: Tue, 29 Mar 2022 20:11:30 -0700 Subject: perf session: Remap buf if there is no space for event If a perf event doesn't fit into remaining buffer space return NULL to remap buf and fetch the event again. Keep the logic to error out on inadequate input from fuzzing. This fixes perf failing on ChromeOS (with 32b userspace): $ perf report -v -i perf.data ... prefetch_event: head=0x1fffff8 event->header_size=0x30, mmap_size=0x2000000: fuzzed or compressed perf.data? Error: failed to process sample Fixes: 57fc032ad643ffd0 ("perf session: Avoid infinite loop when seeing invalid header.size") Reviewed-by: James Clark Signed-off-by: Denis Nikitin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220330031130.2152327-1-denik@chromium.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b8dfe603e50..45a30040ec8d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2095,6 +2095,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2107,15 +2108,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } -- cgit v1.2.3 From 0ff26efe92844aa3910eff8951739d44a5ab6493 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:40 +0200 Subject: perf docs: Add perf-iostat link to manpages Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 9c330cdfa973..71ebdf8125de 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], linkperf:perf-evlist[1], linkperf:perf-ftrace[1], linkperf:perf-help[1], linkperf:perf-inject[1], -linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1], linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], linkperf:perf-script[1], linkperf:perf-test[1], -- cgit v1.2.3 From 3e6b43beb7b56ac6fd376c84f06d90ded73a2788 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:41 +0200 Subject: perf tools: Add external commands to list-cmds The `perf --list-cmds` output prints only internal commands, although there is no reason for that from users' perspective. Adding the external commands to commands array with NULL function pointer allows printing all perf commands while not changing the logic of command handler selection. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-2-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6aae7b6c376b..0170cb0819d6 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ struct cmd_struct { }; static struct cmd_struct commands[] = { + { "archive", NULL, 0 }, { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "iostat", NULL, 0 }, { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, @@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands+i; + if (p->fn == NULL) + continue; if (strcmp(p->cmd, cmd)) continue; exit(run_builtin(p, argc, argv)); -- cgit v1.2.3 From 940a445a904088eac715dd985c01847311a42459 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 Apr 2022 16:04:59 -0700 Subject: perf annotate: Drop objdump stderr to avoid getting stuck waiting for stdout output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If objdump writes to stderr it can block waiting for it to be read. As perf doesn't read stderr then progress stops with perf waiting for stdout output. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Dave Marchevsky Cc: Denis Nikitin Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Lexi Shao Cc: Li Huafei Cc: Mark Rutland Cc: Martin Liška Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Remi Bernon Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Thomas Richter Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20220407230503.1265036-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e4c641b240df..82cc396ef516 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) objdump_process.argv = objdump_argv; objdump_process.out = -1; objdump_process.err = -1; + objdump_process.no_stderr = 1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; -- cgit v1.2.3