summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2021-03-08 10:11:33 -0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2021-03-08 10:11:33 -0300
commit009ef05f98129aa91c62c3baab859ba593a15bb2 (patch)
treef3414f08d636a597545b1e4f443b373b9d6d8f4b /tools
parent2777b81b379df772defd654bc4d3fa82dca17a4b (diff)
parent144c79ef33536b4ecb4951e07dbc1f2b7fa99d32 (diff)
downloadlinux-009ef05f98129aa91c62c3baab859ba593a15bb2.tar.bz2
Merge remote-tracking branch 'torvalds/master' into perf/core
To pick up the fixes sent for v5.12 and continue development based on v5.12-rc2, i.e. without the swap on file bug. This also gets a slightly newer and better tools/perf/arch/arm/util/cs-etm.c patch version, using the BIT() macro, that had already been slated to v5.13 but ended up going to v5.12-rc1 on an older version. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h19
-rw-r--r--tools/arch/x86/include/asm/insn.h45
-rw-r--r--tools/arch/x86/include/asm/orc_types.h10
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--tools/arch/x86/lib/insn.c119
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/include/linux/export.h3
-rw-r--r--tools/include/linux/objtool.h13
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/linux/mount.h16
-rw-r--r--tools/lib/perf/evlist.c13
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/objtool/.gitignore2
-rw-r--r--tools/objtool/Documentation/stack-validation.txt16
-rw-r--r--tools/objtool/Makefile5
-rw-r--r--tools/objtool/arch/x86/decode.c54
-rw-r--r--tools/objtool/arch/x86/include/arch/cfi_regs.h (renamed from tools/objtool/arch/x86/include/cfi_regs.h)0
-rw-r--r--tools/objtool/arch/x86/include/arch/elf.h (renamed from tools/objtool/arch/x86/include/arch_elf.h)0
-rw-r--r--tools/objtool/arch/x86/include/arch/endianness.h9
-rw-r--r--tools/objtool/arch/x86/include/arch/special.h (renamed from tools/objtool/arch/x86/include/arch_special.h)0
-rw-r--r--tools/objtool/arch/x86/special.c6
-rw-r--r--tools/objtool/builtin-check.c14
-rw-r--r--tools/objtool/builtin-orc.c10
-rw-r--r--tools/objtool/check.c515
-rw-r--r--tools/objtool/elf.c40
-rw-r--r--tools/objtool/include/objtool/arch.h (renamed from tools/objtool/arch.h)8
-rw-r--r--tools/objtool/include/objtool/builtin.h (renamed from tools/objtool/builtin.h)2
-rw-r--r--tools/objtool/include/objtool/cfi.h (renamed from tools/objtool/cfi.h)2
-rw-r--r--tools/objtool/include/objtool/check.h (renamed from tools/objtool/check.h)39
-rw-r--r--tools/objtool/include/objtool/elf.h (renamed from tools/objtool/elf.h)0
-rw-r--r--tools/objtool/include/objtool/endianness.h38
-rw-r--r--tools/objtool/include/objtool/objtool.h (renamed from tools/objtool/objtool.h)6
-rw-r--r--tools/objtool/include/objtool/special.h (renamed from tools/objtool/special.h)4
-rw-r--r--tools/objtool/include/objtool/warn.h (renamed from tools/objtool/warn.h)2
-rw-r--r--tools/objtool/objtool.c7
-rw-r--r--tools/objtool/orc_dump.c11
-rw-r--r--tools/objtool/orc_gen.c315
-rw-r--r--tools/objtool/special.c14
-rw-r--r--tools/objtool/weak.c9
-rw-r--r--tools/perf/Documentation/perf-evlist.txt2
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt4
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt2
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Makefile.perf12
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c4
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/Makefile11
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c2
-rw-r--r--tools/perf/arch/x86/tests/sample-parsing.c121
-rw-r--r--tools/perf/arch/x86/util/archinsn.c2
-rw-r--r--tools/perf/bench/numa.c42
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/bench/sched-pipe.c4
-rw-r--r--tools/perf/bench/syscall.c4
-rw-r--r--tools/perf/builtin-daemon.c6
-rw-r--r--tools/perf/builtin-diff.c3
-rw-r--r--tools/perf/builtin-trace.c5
-rw-r--r--tools/perf/perf-archive.sh3
-rw-r--r--tools/perf/tests/attr.c8
-rw-r--r--tools/perf/tests/code-reading.c10
-rw-r--r--tools/perf/tests/cpumap.c2
-rw-r--r--tools/perf/tests/keep-tracking.c5
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/tests/sample-parsing.c4
-rwxr-xr-xtools/perf/tests/shell/daemon.sh32
-rw-r--r--tools/perf/tests/sw-clock.c12
-rw-r--r--tools/perf/tests/switch-tracking.c5
-rw-r--r--tools/perf/tests/task-exit.c10
-rw-r--r--tools/perf/tests/thread-map.c8
-rw-r--r--tools/perf/util/evlist.c1
-rw-r--r--tools/perf/util/evsel.c18
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/header.c4
-rw-r--r--tools/perf/util/map.c7
-rw-r--r--tools/perf/util/parse-events.y6
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/sort.c4
-rw-r--r--tools/perf/util/stat-display.c2
-rw-r--r--tools/perf/util/stat.c47
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c21
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c165
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c6
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c159
-rw-r--r--tools/testing/selftests/mount_setattr/.gitignore1
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile7
-rw-r--r--tools/testing/selftests/mount_setattr/config1
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c1424
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh15
-rw-r--r--tools/tracing/latency/latency-collector.c6
101 files changed, 3026 insertions, 609 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 84b887825f12..cc96e26d69f7 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 19 /* N 32-bit words worth of info */
+#define NCAPINTS 20 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE! ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,7 +201,7 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
@@ -211,7 +211,7 @@
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE! ( 7*32+20) */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -236,8 +236,6 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -294,6 +292,7 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
@@ -337,6 +336,7 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
@@ -385,6 +385,13 @@
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+
/*
* BUG word(s)
*/
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 52c6262e6bfd..cc777c185212 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
* Copyright (C) IBM Corporation, 2009
*/
+#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
#include "inat.h"
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,48 @@ struct insn_field {
unsigned char nbytes;
};
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+ insn_value_t value;
+ union {
+ insn_value_t little;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->little = __cpu_to_le32(v);
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+ p->value = __le32_to_cpu(p->little);
+}
+#endif
+
struct insn {
struct insn_field prefixes; /*
* Prefixes
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index fdbffec4cfde..5a2baf28a1dc 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -40,6 +40,8 @@
#define ORC_REG_MAX 15
#ifndef __ASSEMBLY__
+#include <asm/byteorder.h>
+
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -51,10 +53,18 @@
struct orc_entry {
s16 sp_offset;
s16 bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
unsigned sp_reg:4;
unsigned bp_reg:4;
unsigned type:2;
unsigned end:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ unsigned bp_reg:4;
+ unsigned sp_reg:4;
+ unsigned unused:5;
+ unsigned end:1;
+ unsigned type:2;
+#endif
} __packed;
#endif /* __ASSEMBLY__ */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8e76d3701db3..5a3022c8af82 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -112,6 +112,7 @@ struct kvm_ioapic_state {
#define KVM_NR_IRQCHIPS 3
#define KVM_RUN_X86_SMM (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK (1 << 1)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index ada955c5ebb6..b8e650a985e3 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -89,6 +89,7 @@
#define EXIT_REASON_XRSTORS 64
#define EXIT_REASON_UMWAIT 67
#define EXIT_REASON_TPAUSE 68
+#define EXIT_REASON_BUS_LOCK 74
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -150,7 +151,8 @@
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
- { EXIT_REASON_TPAUSE, "TPAUSE" }
+ { EXIT_REASON_TPAUSE, "TPAUSE" }, \
+ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 0151dfc6da61..3d9355ed1246 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/
+#include <linux/kernel.h>
#ifdef __KERNEL__
#include <linux/string.h>
#else
@@ -15,15 +16,28 @@
#include "../include/asm/emulate_prefix.h"
+#define leXX_to_cpu(t, r) \
+({ \
+ __typeof__(t) v; \
+ switch (sizeof(t)) { \
+ case 4: v = le32_to_cpu(r); break; \
+ case 2: v = le16_to_cpu(r); break; \
+ case 1: v = r; break; \
+ default: \
+ BUILD_BUG(); break; \
+ } \
+ v; \
+})
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
#define __get_next(t, insn) \
- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
#define __peek_nbyte_next(t, insn, n) \
- ({ t r = *(t*)((insn)->next_byte + n); r; })
+ ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -147,9 +161,9 @@ found:
b = insn->prefixes.bytes[3];
for (i = 0; i < nb; i++)
if (prefixes->bytes[i] == lb)
- prefixes->bytes[i] = b;
+ insn_set_byte(prefixes, i, b);
}
- insn->prefixes.bytes[3] = lb;
+ insn_set_byte(&insn->prefixes, 3, lb);
}
/* Decode REX prefix */
@@ -157,8 +171,7 @@ found:
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
- insn->rex_prefix.value = b;
- insn->rex_prefix.nbytes = 1;
+ insn_field_set(&insn->rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
@@ -181,13 +194,13 @@ found:
if (X86_MODRM_MOD(b2) != 3)
goto vex_end;
}
- insn->vex_prefix.bytes[0] = b;
- insn->vex_prefix.bytes[1] = b2;
+ insn_set_byte(&insn->vex_prefix, 0, b);
+ insn_set_byte(&insn->vex_prefix, 1, b2);
if (inat_is_evex_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
- insn->vex_prefix.bytes[2] = b2;
+ insn_set_byte(&insn->vex_prefix, 2, b2);
b2 = peek_nbyte_next(insn_byte_t, insn, 3);
- insn->vex_prefix.bytes[3] = b2;
+ insn_set_byte(&insn->vex_prefix, 3, b2);
insn->vex_prefix.nbytes = 4;
insn->next_byte += 4;
if (insn->x86_64 && X86_VEX_W(b2))
@@ -195,7 +208,7 @@ found:
insn->opnd_bytes = 8;
} else if (inat_is_vex3_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
- insn->vex_prefix.bytes[2] = b2;
+ insn_set_byte(&insn->vex_prefix, 2, b2);
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
@@ -207,7 +220,7 @@ found:
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
- insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f);
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}
@@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn)
/* Get first opcode */
op = get_next(insn_byte_t, insn);
- opcode->bytes[0] = op;
+ insn_set_byte(opcode, 0, op);
opcode->nbytes = 1;
/* Check if there is VEX prefix or not */
@@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn)
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
- modrm->value = mod;
- modrm->nbytes = 1;
+ insn_field_set(modrm, mod, 1);
if (inat_is_group(insn->attr)) {
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn)
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
*/
- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+ return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
}
/**
@@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn)
if (!insn->modrm.got)
insn_get_modrm(insn);
if (insn->modrm.nbytes) {
- modrm = (insn_byte_t)insn->modrm.value;
+ modrm = insn->modrm.bytes[0];
if (insn->addr_bytes != 2 &&
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
- insn->sib.value = get_next(insn_byte_t, insn);
- insn->sib.nbytes = 1;
+ insn_field_set(&insn->sib,
+ get_next(insn_byte_t, insn), 1);
}
}
insn->sib.got = 1;
@@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn)
if (mod == 3)
goto out;
if (mod == 1) {
- insn->displacement.value = get_next(signed char, insn);
- insn->displacement.nbytes = 1;
+ insn_field_set(&insn->displacement,
+ get_next(signed char, insn), 1);
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
- insn->displacement.value =
- get_next(short, insn);
- insn->displacement.nbytes = 2;
+ insn_field_set(&insn->displacement,
+ get_next(short, insn), 2);
}
} else {
if ((mod == 0 && rm == 5) || mod == 2 ||
(mod == 0 && base == 5)) {
- insn->displacement.value = get_next(int, insn);
- insn->displacement.nbytes = 4;
+ insn_field_set(&insn->displacement,
+ get_next(int, insn), 4);
}
}
}
@@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
- insn->moffset1.value = get_next(short, insn);
- insn->moffset1.nbytes = 2;
+ insn_field_set(&insn->moffset1, get_next(short, insn), 2);
break;
case 4:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
break;
case 8:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
- insn->moffset2.value = get_next(int, insn);
- insn->moffset2.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+ insn_field_set(&insn->moffset2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case 4:
case 8:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
insn->immediate1.nbytes = 4;
break;
case 8:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
break;
case 8:
/* ptr16:64 is not exist (no segment) */
@@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn)
default: /* opnd_bytes must be modified manually */
goto err_out;
}
- insn->immediate2.value = get_next(unsigned short, insn);
- insn->immediate2.nbytes = 2;
+ insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
insn->immediate1.got = insn->immediate2.got = 1;
return 1;
@@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn)
switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
- insn->immediate.value = get_next(signed char, insn);
- insn->immediate.nbytes = 1;
+ insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
break;
case INAT_IMM_WORD:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case INAT_IMM_DWORD:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
case INAT_IMM_QWORD:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
case INAT_IMM_PTR:
if (!__get_immptr(insn))
@@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn)
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
- insn->immediate2.value = get_next(signed char, insn);
- insn->immediate2.nbytes = 1;
+ insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
}
done:
insn->immediate.got = 1;
diff --git a/tools/build/Makefile b/tools/build/Makefile
index bae48e6fa995..5ed41b96fcde 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
all: $(OUTPUT)fixdep
+# Make sure there's anything to clean,
+# feature contains check for existing OUTPUT
+TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+
clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
$(call QUIET_CLEAN, feature-detect)
- $(Q)$(MAKE) -C feature/ clean >/dev/null
+ifneq ($(wildcard $(TMP_O)),)
+ $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
+endif
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
index d07e586b9ba0..acb6f4daa2f0 100644
--- a/tools/include/linux/export.h
+++ b/tools/include/linux/export.h
@@ -3,8 +3,5 @@
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
#endif
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 577f51436cf9..7e72d975cb76 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -29,11 +29,14 @@ struct unwind_hint {
*
* UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
* sp_reg+sp_offset points to the iret return frame.
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
-#define UNWIND_HINT_TYPE_RET_OFFSET 3
+#define UNWIND_HINT_TYPE_FUNC 3
#ifdef CONFIG_STACK_VALIDATION
@@ -109,6 +112,12 @@ struct unwind_hint {
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD func:req
+ .pushsection .discard.func_stack_frame_non_standard, "aw"
+ .long \func - .
+ .popsection
+.endm
+
#endif /* __ASSEMBLY__ */
#else /* !CONFIG_STACK_VALIDATION */
@@ -122,6 +131,8 @@ struct unwind_hint {
#define ANNOTATE_INTRA_FUNCTION_CALL
.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
.endm
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
#endif
#endif /* CONFIG_STACK_VALIDATION */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 728752917785..ce58cff99b66 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
__SYSCALL(__NR_process_madvise, sys_process_madvise)
#define __NR_epoll_pwait2 441
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
#undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index dd8306ea336c..e6524ead2b7b 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -1,6 +1,8 @@
#ifndef _UAPI_LINUX_MOUNT_H
#define _UAPI_LINUX_MOUNT_H
+#include <linux/types.h>
+
/*
* These are the fs-independent mount-flags: up to 32 flags are supported
*
@@ -117,5 +119,19 @@ enum fsconfig_command {
#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 17465d454a0e..a0aaf385cbb5 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -26,13 +26,10 @@
void perf_evlist__init(struct perf_evlist *evlist)
{
- int i;
-
- for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
- INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
fdarray__init(&evlist->pollfd, 64);
+ perf_evlist__reset_id_hash(evlist);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
hlist_add_head(&sid->node, &evlist->heads[hash]);
}
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, u64 id)
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 2d0fa02b036f..212c29063ad4 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, int fd);
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 45cefda24c7b..14236db3677f 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
arch/x86/lib/inat-tables.c
-objtool
+/objtool
fixdep
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 0542e46c7552..30f38fdc0d56 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them.
function tracing inserts additional calls, which is not obvious from the
sources).
-10. file.o: warning: func()+0x5c: alternative modifies stack
-
- This means that an alternative includes instructions that modify the
- stack. The problem is that there is only one ORC unwind table, this means
- that the ORC unwind entries must be valid for each of the alternatives.
- The easiest way to enforce this is to ensure alternatives do not contain
- any ORC entries, which in turn implies the above constraint.
+10. file.o: warning: func()+0x5c: stack layout conflict in alternatives
+
+ This means that in the use of the alternative() or ALTERNATIVE()
+ macro, the code paths have conflicting modifications to the stack.
+ The problem is that there is only one ORC unwind table, which means
+ that the ORC unwind entries must be consistent for all possible
+ instruction boundaries regardless of which code has been patched.
+ This limitation can be overcome by massaging the alternatives with
+ NOPs to shift the stack changes around so they no longer conflict.
11. file.o: warning: unannotated intra-function call
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 5cdb19036d7f..92ce4fce7bc7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -27,6 +27,7 @@ all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include \
+ -I$(srctree)/tools/objtool/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
@@ -46,10 +47,6 @@ ifeq ($(SRCARCH),x86)
SUBCMD_ORC := y
endif
-ifeq ($(SUBCMD_ORC),y)
- CFLAGS += -DINSN_USE_ORC
-endif
-
export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index cde9c36e40ae..549813cff8ab 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,11 +11,11 @@
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
-#include "../../check.h"
-#include "../../elf.h"
-#include "../../arch.h"
-#include "../../warn.h"
#include <asm/orc_types.h>
+#include <objtool/check.h>
+#include <objtool/elf.h>
+#include <objtool/arch.h>
+#include <objtool/warn.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -222,15 +222,38 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
case 0x89:
- if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
+ if (rex_w && !rex_r && modrm_reg == 4) {
- /* mov %rsp, reg */
- ADD_OP(op) {
- op->src.type = OP_SRC_REG;
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ if (modrm_mod == 3) {
+ /* mov %rsp, reg */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ }
+ break;
+
+ } else {
+ /* skip nontrivial SIB */
+ if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
+ break;
+
+ /* skip RIP relative displacement */
+ if (modrm_rm == 5 && modrm_mod == 0)
+ break;
+
+ /* mov %rsp, disp(%reg) */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.offset = insn.displacement.value;
+ }
+ break;
}
+
break;
}
@@ -259,8 +282,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_BP;
op->dest.offset = insn.displacement.value;
}
+ break;
+ }
- } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+ if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
/* mov reg, disp(%rsp) */
ADD_OP(op) {
@@ -270,6 +295,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_SP;
op->dest.offset = insn.displacement.value;
}
+ break;
}
break;
@@ -563,8 +589,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->cfa.offset = 8;
/* initial RA (return address) */
- state->regs[16].base = CFI_CFA;
- state->regs[16].offset = -8;
+ state->regs[CFI_RA].base = CFI_CFA;
+ state->regs[CFI_RA].offset = -8;
}
const char *arch_nop_insn(int len)
diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517efba8..79bc517efba8 100644
--- a/tools/objtool/arch/x86/include/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch/elf.h
index 69cc4264b28a..69cc4264b28a 100644
--- a/tools/objtool/arch/x86/include/arch_elf.h
+++ b/tools/objtool/arch/x86/include/arch/elf.h
diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h
new file mode 100644
index 000000000000..7c362527da20
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch/endianness.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ARCH_ENDIANNESS_H
+#define _ARCH_ENDIANNESS_H
+
+#include <endian.h>
+
+#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN
+
+#endif /* _ARCH_ENDIANNESS_H */
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch/special.h
index d818b2bffa02..d818b2bffa02 100644
--- a/tools/objtool/arch/x86/include/arch_special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index fd4af88c0ea5..e707d9bcd161 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string.h>
-#include "../../special.h"
-#include "../../builtin.h"
+#include <objtool/special.h>
+#include <objtool/builtin.h>
#define X86_FEATURE_POPCNT (4 * 32 + 23)
#define X86_FEATURE_SMAP (9 * 32 + 20)
@@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
* replacement group.
*/
return insn->offset == special_alt->new_off &&
- (insn->type == INSN_CALL || is_static_jump(insn));
+ (insn->type == INSN_CALL || is_jump(insn));
}
/*
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c6d199bfd0ae..c3a85d8f6c5c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,10 +15,10 @@
#include <subcmd/parse-options.h>
#include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -34,13 +34,15 @@ const struct option check_options[] = {
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+ OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
+ OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
OPT_END(),
};
int cmd_check(int argc, const char **argv)
{
- const char *objname, *s;
+ const char *objname;
struct objtool_file *file;
int ret;
@@ -51,10 +53,6 @@ int cmd_check(int argc, const char **argv)
objname = argv[0];
- s = strstr(objname, "vmlinux.o");
- if (s && !s[9])
- vmlinux = true;
-
file = objtool_open_read(objname);
if (!file)
return 1;
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 7b31121fa60b..8273bbf7cebb 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -13,8 +13,8 @@
*/
#include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
static const char *orc_usage[] = {
"objtool orc generate [<options>] file.o",
@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv)
if (list_empty(&file->insn_list))
return 0;
- ret = create_orc(file);
- if (ret)
- return ret;
-
- ret = create_orc_sections(file);
+ ret = orc_create(file);
if (ret)
return ret;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f2e5e5ce1a05..068cdb41f76f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -6,21 +6,20 @@
#include <string.h>
#include <stdlib.h>
-#include "builtin.h"
-#include "cfi.h"
-#include "arch.h"
-#include "check.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_elf.h"
+#include <arch/elf.h>
+#include <objtool/builtin.h>
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
#include <linux/objtool.h>
#include <linux/hashtable.h>
#include <linux/kernel.h>
#include <linux/static_call_types.h>
-#define FAKE_JUMP_OFFSET -1
-
struct alternative {
struct list_head list;
struct instruction *insn;
@@ -111,15 +110,20 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
static bool is_sibling_call(struct instruction *insn)
{
+ /*
+ * Assume only ELF functions can make sibling calls. This ensures
+ * sibling call detection consistency between vmlinux.o and individual
+ * objects.
+ */
+ if (!insn->func)
+ return false;
+
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
return list_empty(&insn->alts);
- if (!is_static_jump(insn))
- return false;
-
/* add_jump_destinations() sets insn->call_dest for sibling calls. */
- return !!insn->call_dest;
+ return (is_static_jump(insn) && insn->call_dest);
}
/*
@@ -156,6 +160,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"machine_real_restart",
"rewind_stack_do_exit",
"kunit_try_catch_throw",
+ "xen_start_kernel",
};
if (!func)
@@ -244,7 +249,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
* not correctly determine insn->call_dest->sec (external symbols do
* not have a section).
*/
- if (vmlinux && sec)
+ if (vmlinux && noinstr && sec)
state->noinstr = sec->noinstr;
}
@@ -543,6 +548,78 @@ static int create_static_call_sections(struct objtool_file *file)
return 0;
}
+static int create_mcount_loc_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ unsigned long *loc;
+ struct instruction *insn;
+ int idx;
+
+ sec = find_section_by_name(file->elf, "__mcount_loc");
+ if (sec) {
+ INIT_LIST_HEAD(&file->mcount_loc_list);
+ WARN("file already has __mcount_loc section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->mcount_loc_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
+
+ loc = (unsigned long *)sec->data->d_buf + idx;
+ memset(loc, 0, sizeof(unsigned long));
+
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+
+ if (insn->sec->sym) {
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ } else {
+ reloc->sym = find_symbol_containing(insn->sec, insn->offset);
+
+ if (!reloc->sym) {
+ WARN("missing symbol for insn at offset 0x%lx\n",
+ insn->offset);
+ return -1;
+ }
+
+ reloc->addend = insn->offset - reloc->sym->offset;
+ }
+
+ reloc->type = R_X86_64_64;
+ reloc->offset = idx * sizeof(unsigned long);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -589,7 +666,7 @@ static void add_ignores(struct objtool_file *file)
static const char *uaccess_safe_builtin[] = {
/* KASAN */
"kasan_report",
- "check_memory_region",
+ "kasan_check_range",
/* KASAN out-of-line */
"__asan_loadN_noabort",
"__asan_load1_noabort",
@@ -787,22 +864,16 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
- if (insn->offset == FAKE_JUMP_OFFSET)
- continue;
-
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
+ insn->offset, insn->len);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc->addend);
- } else if (reloc->sym->sec->idx) {
- dest_sec = reloc->sym->sec;
- dest_off = reloc->sym->sym.st_value +
- arch_dest_reloc_offset(reloc->addend);
- } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
+ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+ !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
/*
* Retpoline jumps are really dynamic jumps in
* disguise, so convert them accordingly.
@@ -814,14 +885,21 @@ static int add_jump_destinations(struct objtool_file *file)
insn->retpoline_safe = true;
continue;
- } else {
- /* external sibling call */
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
&file->static_call_list);
}
continue;
+ } else if (reloc->sym->sec->idx) {
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->sym.st_value +
+ arch_dest_reloc_offset(reloc->addend);
+ } else {
+ /* non-func asm code jumping to another file */
+ continue;
}
insn->jump_dest = find_insn(file, dest_sec, dest_off);
@@ -862,15 +940,15 @@ static int add_jump_destinations(struct objtool_file *file)
* case where the parent function's only reference to a
* subfunction is through a jump table.
*/
- if (!strstr(insn->func->name, ".cold.") &&
- strstr(insn->jump_dest->func->name, ".cold.")) {
+ if (!strstr(insn->func->name, ".cold") &&
+ strstr(insn->jump_dest->func->name, ".cold")) {
insn->func->cfunc = insn->jump_dest->func;
insn->jump_dest->func->pfunc = insn->func;
} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
insn->jump_dest->offset == insn->jump_dest->func->offset) {
- /* internal sibling call */
+ /* internal sibling call (without reloc) */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
@@ -969,6 +1047,22 @@ static int add_call_destinations(struct objtool_file *file)
insn->type = INSN_NOP;
}
+ if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+
+ elf_write_insn(file->elf, insn->sec,
+ insn->offset, insn->len,
+ arch_nop_insn(insn->len));
+
+ insn->type = INSN_NOP;
+
+ list_add_tail(&insn->mcount_loc_node,
+ &file->mcount_loc_list);
+ }
+
/*
* Whatever stack impact regular CALLs have, should be undone
* by the RETURN of the called function.
@@ -983,73 +1077,83 @@ static int add_call_destinations(struct objtool_file *file)
}
/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- * instruction at the end so that validate_branch() skips all the original
- * replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero. In
- * that case the old instructions are replaced with noops. We simulate that
- * by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- * conditionally jumps to the _end_ of the entry. We have to modify these
- * jumps' destinations to point back to .text rather than the end of the
- * entry in .altinstr_replacement.
+ * The .alternatives section requires some extra special care over and above
+ * other special sections because alternatives are patched in place.
*/
static int handle_group_alt(struct objtool_file *file,
struct special_alt *special_alt,
struct instruction *orig_insn,
struct instruction **new_insn)
{
- static unsigned int alt_group_next_index = 1;
- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
- unsigned int alt_group = alt_group_next_index++;
+ struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+ struct alt_group *orig_alt_group, *new_alt_group;
unsigned long dest_off;
+
+ orig_alt_group = malloc(sizeof(*orig_alt_group));
+ if (!orig_alt_group) {
+ WARN("malloc failed");
+ return -1;
+ }
+ orig_alt_group->cfi = calloc(special_alt->orig_len,
+ sizeof(struct cfi_state *));
+ if (!orig_alt_group->cfi) {
+ WARN("calloc failed");
+ return -1;
+ }
+
last_orig_insn = NULL;
insn = orig_insn;
sec_for_each_insn_from(file, insn) {
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
break;
- insn->alt_group = alt_group;
+ insn->alt_group = orig_alt_group;
last_orig_insn = insn;
}
+ orig_alt_group->orig_group = NULL;
+ orig_alt_group->first_insn = orig_insn;
+ orig_alt_group->last_insn = last_orig_insn;
+
+
+ new_alt_group = malloc(sizeof(*new_alt_group));
+ if (!new_alt_group) {
+ WARN("malloc failed");
+ return -1;
+ }
- if (next_insn_same_sec(file, last_orig_insn)) {
- fake_jump = malloc(sizeof(*fake_jump));
- if (!fake_jump) {
+ if (special_alt->new_len < special_alt->orig_len) {
+ /*
+ * Insert a fake nop at the end to make the replacement
+ * alt_group the same size as the original. This is needed to
+ * allow propagate_alt_cfi() to do its magic. When the last
+ * instruction affects the stack, the instruction after it (the
+ * nop) will propagate the new state to the shared CFI array.
+ */
+ nop = malloc(sizeof(*nop));
+ if (!nop) {
WARN("malloc failed");
return -1;
}
- memset(fake_jump, 0, sizeof(*fake_jump));
- INIT_LIST_HEAD(&fake_jump->alts);
- INIT_LIST_HEAD(&fake_jump->stack_ops);
- init_cfi_state(&fake_jump->cfi);
+ memset(nop, 0, sizeof(*nop));
+ INIT_LIST_HEAD(&nop->alts);
+ INIT_LIST_HEAD(&nop->stack_ops);
+ init_cfi_state(&nop->cfi);
- fake_jump->sec = special_alt->new_sec;
- fake_jump->offset = FAKE_JUMP_OFFSET;
- fake_jump->type = INSN_JUMP_UNCONDITIONAL;
- fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
- fake_jump->func = orig_insn->func;
+ nop->sec = special_alt->new_sec;
+ nop->offset = special_alt->new_off + special_alt->new_len;
+ nop->len = special_alt->orig_len - special_alt->new_len;
+ nop->type = INSN_NOP;
+ nop->func = orig_insn->func;
+ nop->alt_group = new_alt_group;
+ nop->ignore = orig_insn->ignore_alts;
}
if (!special_alt->new_len) {
- if (!fake_jump) {
- WARN("%s: empty alternative at end of section",
- special_alt->orig_sec->name);
- return -1;
- }
-
- *new_insn = fake_jump;
- return 0;
+ *new_insn = nop;
+ goto end;
}
- last_new_insn = NULL;
- alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
struct reloc *alt_reloc;
@@ -1061,7 +1165,7 @@ static int handle_group_alt(struct objtool_file *file,
insn->ignore = orig_insn->ignore_alts;
insn->func = orig_insn->func;
- insn->alt_group = alt_group;
+ insn->alt_group = new_alt_group;
/*
* Since alternative replacement code is copy/pasted by the
@@ -1088,14 +1192,8 @@ static int handle_group_alt(struct objtool_file *file,
continue;
dest_off = arch_jump_destination(insn);
- if (dest_off == special_alt->new_off + special_alt->new_len) {
- if (!fake_jump) {
- WARN("%s: alternative jump to end of section",
- special_alt->orig_sec->name);
- return -1;
- }
- insn->jump_dest = fake_jump;
- }
+ if (dest_off == special_alt->new_off + special_alt->new_len)
+ insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
if (!insn->jump_dest) {
WARN_FUNC("can't find alternative jump destination",
@@ -1110,9 +1208,13 @@ static int handle_group_alt(struct objtool_file *file,
return -1;
}
- if (fake_jump)
- list_add(&fake_jump->list, &last_new_insn->list);
-
+ if (nop)
+ list_add(&nop->list, &last_new_insn->list);
+end:
+ new_alt_group->orig_group = orig_alt_group;
+ new_alt_group->first_insn = *new_insn;
+ new_alt_group->last_insn = nop ? : last_new_insn;
+ new_alt_group->cfi = orig_alt_group->cfi;
return 0;
}
@@ -1404,13 +1506,20 @@ static int add_jump_table_alts(struct objtool_file *file)
return 0;
}
+static void set_func_state(struct cfi_state *state)
+{
+ state->cfa = initial_func_cfi.cfa;
+ memcpy(&state->regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state->stack_size = initial_func_cfi.cfa.offset;
+}
+
static int read_unwind_hints(struct objtool_file *file)
{
struct section *sec, *relocsec;
struct reloc *reloc;
struct unwind_hint *hint;
struct instruction *insn;
- struct cfi_reg *cfa;
int i;
sec = find_section_by_name(file->elf, ".discard.unwind_hints");
@@ -1445,22 +1554,20 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- cfa = &insn->cfi.cfa;
+ insn->hint = true;
- if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
- insn->ret_offset = hint->sp_offset;
+ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+ set_func_state(&insn->cfi);
continue;
}
- insn->hint = true;
-
if (arch_decode_hint_reg(insn, hint->sp_reg)) {
WARN_FUNC("unsupported unwind_hint sp base reg %d",
insn->sec, insn->offset, hint->sp_reg);
return -1;
}
- cfa->offset = hint->sp_offset;
+ insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
insn->cfi.type = hint->type;
insn->cfi.end = hint->end;
}
@@ -1716,27 +1823,18 @@ static bool is_fentry_call(struct instruction *insn)
static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
{
- u8 ret_offset = insn->ret_offset;
struct cfi_state *cfi = &state->cfi;
int i;
if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
return true;
- if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
+ if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
return true;
- if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
+ if (cfi->stack_size != initial_func_cfi.cfa.offset)
return true;
- /*
- * If there is a ret offset hint then don't check registers
- * because a callee-saved register might have been pushed on
- * the stack.
- */
- if (ret_offset)
- return false;
-
for (i = 0; i < CFI_NUM_REGS; i++) {
if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
@@ -1746,12 +1844,20 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state
return false;
}
+static bool check_reg_frame_pos(const struct cfi_reg *reg,
+ int expected_offset)
+{
+ return reg->base == CFI_CFA &&
+ reg->offset == expected_offset;
+}
+
static bool has_valid_stack_frame(struct insn_state *state)
{
struct cfi_state *cfi = &state->cfi;
- if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
- cfi->regs[CFI_BP].offset == -16)
+ if (cfi->cfa.base == CFI_BP &&
+ check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) &&
+ check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8))
return true;
if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
@@ -1880,8 +1986,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
case OP_SRC_REG:
if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
cfa->base == CFI_SP &&
- regs[CFI_BP].base == CFI_CFA &&
- regs[CFI_BP].offset == -cfa->offset) {
+ check_reg_frame_pos(&regs[CFI_BP], -cfa->offset)) {
/* mov %rsp, %rbp */
cfa->base = op->dest.reg;
@@ -1941,12 +2046,58 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
cfa->offset = -cfi->vals[op->src.reg].offset;
cfi->stack_size = cfa->offset;
+ } else if (cfa->base == CFI_SP &&
+ cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+ cfi->vals[op->src.reg].offset == cfa->offset) {
+
+ /*
+ * Stack swizzle:
+ *
+ * 1: mov %rsp, (%[tos])
+ * 2: mov %[tos], %rsp
+ * ...
+ * 3: pop %rsp
+ *
+ * Where:
+ *
+ * 1 - places a pointer to the previous
+ * stack at the Top-of-Stack of the
+ * new stack.
+ *
+ * 2 - switches to the new stack.
+ *
+ * 3 - pops the Top-of-Stack to restore
+ * the original stack.
+ *
+ * Note: we set base to SP_INDIRECT
+ * here and preserve offset. Therefore
+ * when the unwinder reaches ToS it
+ * will dereference SP and then add the
+ * offset to find the next frame, IOW:
+ * (%rsp) + offset.
+ */
+ cfa->base = CFI_SP_INDIRECT;
+
} else {
cfa->base = CFI_UNDEFINED;
cfa->offset = 0;
}
}
+ else if (op->dest.reg == CFI_SP &&
+ cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+ cfi->vals[op->src.reg].offset == cfa->offset) {
+
+ /*
+ * The same stack swizzle case 2) as above. But
+ * because we can't change cfa->base, case 3)
+ * will become a regular POP. Pretend we're a
+ * PUSH so things don't go unbalanced.
+ */
+ cfi->stack_size += 8;
+ }
+
+
break;
case OP_SRC_ADD:
@@ -1966,6 +2117,17 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
}
+ if (!cfi->drap && op->src.reg == CFI_SP &&
+ op->dest.reg == CFI_BP && cfa->base == CFI_SP &&
+ check_reg_frame_pos(&regs[CFI_BP], -cfa->offset + op->src.offset)) {
+
+ /* lea disp(%rsp), %rbp */
+ cfa->base = CFI_BP;
+ cfa->offset -= op->src.offset;
+ cfi->bp_scratch = false;
+ break;
+ }
+
if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
/* drap: lea disp(%rsp), %drap */
@@ -2032,6 +2194,13 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
case OP_SRC_POP:
case OP_SRC_POPF:
+ if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) {
+
+ /* pop %rsp; # restore from a stack swizzle */
+ cfa->base = CFI_SP;
+ break;
+ }
+
if (!cfi->drap && op->dest.reg == cfa->base) {
/* pop %rbp */
@@ -2060,6 +2229,14 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
case OP_SRC_REG_INDIRECT:
+ if (!cfi->drap && op->dest.reg == cfa->base &&
+ op->dest.reg == CFI_BP) {
+
+ /* mov disp(%rsp), %rbp */
+ cfa->base = CFI_SP;
+ cfa->offset = cfi->stack_size;
+ }
+
if (cfi->drap && op->src.reg == CFI_BP &&
op->src.offset == cfi->drap_offset) {
@@ -2081,6 +2258,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* mov disp(%rbp), %reg */
/* mov disp(%rsp), %reg */
restore_reg(cfi, op->dest.reg);
+
+ } else if (op->src.reg == CFI_SP &&
+ op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) {
+
+ /* mov disp(%rsp), %reg */
+ restore_reg(cfi, op->dest.reg);
}
break;
@@ -2158,6 +2341,18 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* mov reg, disp(%rsp) */
save_reg(cfi, op->src.reg, CFI_CFA,
op->dest.offset - cfi->cfa.offset);
+
+ } else if (op->dest.reg == CFI_SP) {
+
+ /* mov reg, disp(%rsp) */
+ save_reg(cfi, op->src.reg, CFI_CFA,
+ op->dest.offset - cfi->stack_size);
+
+ } else if (op->src.reg == CFI_SP && op->dest.offset == 0) {
+
+ /* mov %rsp, (%reg); # setup a stack swizzle. */
+ cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT;
+ cfi->vals[op->dest.reg].offset = cfa->offset;
}
break;
@@ -2205,22 +2400,47 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
return 0;
}
-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+/*
+ * The stack layouts of alternatives instructions can sometimes diverge when
+ * they have stack modifications. That's fine as long as the potential stack
+ * layouts don't conflict at any given potential instruction boundary.
+ *
+ * Flatten the CFIs of the different alternative code streams (both original
+ * and replacement) into a single shared CFI array which can be used to detect
+ * conflicts and nicely feed a linear array of ORC entries to the unwinder.
+ */
+static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
{
- struct stack_op *op;
+ struct cfi_state **alt_cfi;
+ int group_off;
- list_for_each_entry(op, &insn->stack_ops, list) {
- struct cfi_state old_cfi = state->cfi;
- int res;
+ if (!insn->alt_group)
+ return 0;
- res = update_cfi_state(insn, &state->cfi, op);
- if (res)
- return res;
+ alt_cfi = insn->alt_group->cfi;
+ group_off = insn->offset - insn->alt_group->first_insn->offset;
- if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
- WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+ if (!alt_cfi[group_off]) {
+ alt_cfi[group_off] = &insn->cfi;
+ } else {
+ if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
+ WARN_FUNC("stack layout conflict in alternatives",
+ insn->sec, insn->offset);
return -1;
}
+ }
+
+ return 0;
+}
+
+static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+{
+ struct stack_op *op;
+
+ list_for_each_entry(op, &insn->stack_ops, list) {
+
+ if (update_cfi_state(insn, &state->cfi, op))
+ return 1;
if (op->dest.type == OP_DEST_PUSHF) {
if (!state->uaccess_stack) {
@@ -2410,28 +2630,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
return 0;
}
-/*
- * Alternatives should not contain any ORC entries, this in turn means they
- * should not contain any CFI ops, which implies all instructions should have
- * the same same CFI state.
- *
- * It is possible to constuct alternatives that have unreachable holes that go
- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
- * states which then results in ORC entries, which we just said we didn't want.
- *
- * Avoid them by copying the CFI entry of the first instruction into the whole
- * alternative.
- */
-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+static struct instruction *next_insn_to_validate(struct objtool_file *file,
+ struct instruction *insn)
{
- struct instruction *first_insn = insn;
- int alt_group = insn->alt_group;
+ struct alt_group *alt_group = insn->alt_group;
- sec_for_each_insn_continue(file, insn) {
- if (insn->alt_group != alt_group)
- break;
- insn->cfi = first_insn->cfi;
- }
+ /*
+ * Simulate the fact that alternatives are patched in-place. When the
+ * end of a replacement alt_group is reached, redirect objtool flow to
+ * the end of the original alt_group.
+ */
+ if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
+ return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+
+ return next_insn_same_sec(file, insn);
}
/*
@@ -2452,7 +2664,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
sec = insn->sec;
while (1) {
- next_insn = next_insn_same_sec(file, insn);
+ next_insn = next_insn_to_validate(file, insn);
if (file->c_file && func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
@@ -2485,6 +2697,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
insn->visited |= visited;
+ if (propagate_alt_cfi(file, insn))
+ return 1;
+
if (!insn->ignore_alts && !list_empty(&insn->alts)) {
bool skip_orig = false;
@@ -2500,9 +2715,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
}
- if (insn->alt_group)
- fill_alternative_cfi(file, insn);
-
if (skip_orig)
return 0;
}
@@ -2540,7 +2752,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
case INSN_JUMP_CONDITIONAL:
case INSN_JUMP_UNCONDITIONAL:
- if (func && is_sibling_call(insn)) {
+ if (is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
@@ -2562,7 +2774,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
case INSN_JUMP_DYNAMIC:
case INSN_JUMP_DYNAMIC_CONDITIONAL:
- if (func && is_sibling_call(insn)) {
+ if (is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
@@ -2605,15 +2817,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_STD:
- if (state.df)
+ if (state.df) {
WARN_FUNC("recursive STD", sec, insn->offset);
+ return 1;
+ }
state.df = true;
break;
case INSN_CLD:
- if (!state.df && func)
+ if (!state.df && func) {
WARN_FUNC("redundant CLD", sec, insn->offset);
+ return 1;
+ }
state.df = false;
break;
@@ -2736,9 +2952,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
- return true;
-
if (!insn->func)
return false;
@@ -2824,10 +3037,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
continue;
init_insn_state(&state, sec);
- state.cfi.cfa = initial_func_cfi.cfa;
- memcpy(&state.cfi.regs, &initial_func_cfi.regs,
- CFI_NUM_REGS * sizeof(struct cfi_reg));
- state.cfi.stack_size = initial_func_cfi.cfa.offset;
+ set_func_state(&state.cfi);
warnings += validate_symbol(file, sec, func, &state);
}
@@ -2940,6 +3150,13 @@ int check(struct objtool_file *file)
goto out;
warnings += ret;
+ if (mcount) {
+ ret = create_mcount_loc_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
out:
/*
* For now, don't fail the kernel build on fatal warnings. These
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e85988ce04f1..93fa833a49a5 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -15,10 +15,10 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
-#include "builtin.h"
+#include <objtool/builtin.h>
-#include "elf.h"
-#include "warn.h"
+#include <objtool/elf.h>
+#include <objtool/warn.h>
#define MAX_NAME_LEN 128
@@ -814,25 +814,27 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
{
struct reloc *reloc;
int idx = 0, size;
- GElf_Rel *relocs;
+ void *buf;
/* Allocate a buffer for relocations */
- size = nr * sizeof(*relocs);
- relocs = malloc(size);
- if (!relocs) {
+ size = nr * sizeof(GElf_Rel);
+ buf = malloc(size);
+ if (!buf) {
perror("malloc");
return -1;
}
- sec->data->d_buf = relocs;
+ sec->data->d_buf = buf;
sec->data->d_size = size;
+ sec->data->d_type = ELF_T_REL;
sec->sh.sh_size = size;
idx = 0;
list_for_each_entry(reloc, &sec->reloc_list, list) {
- relocs[idx].r_offset = reloc->offset;
- relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rel.r_offset = reloc->offset;
+ reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ gelf_update_rel(sec->data, idx, &reloc->rel);
idx++;
}
@@ -843,26 +845,28 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
{
struct reloc *reloc;
int idx = 0, size;
- GElf_Rela *relocs;
+ void *buf;
/* Allocate a buffer for relocations with addends */
- size = nr * sizeof(*relocs);
- relocs = malloc(size);
- if (!relocs) {
+ size = nr * sizeof(GElf_Rela);
+ buf = malloc(size);
+ if (!buf) {
perror("malloc");
return -1;
}
- sec->data->d_buf = relocs;
+ sec->data->d_buf = buf;
sec->data->d_size = size;
+ sec->data->d_type = ELF_T_RELA;
sec->sh.sh_size = size;
idx = 0;
list_for_each_entry(reloc, &sec->reloc_list, list) {
- relocs[idx].r_offset = reloc->offset;
- relocs[idx].r_addend = reloc->addend;
- relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rela.r_offset = reloc->offset;
+ reloc->rela.r_addend = reloc->addend;
+ reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ gelf_update_rela(sec->data, idx, &reloc->rela);
idx++;
}
diff --git a/tools/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 4a84c3081b8e..6ff0685f5cc5 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -8,12 +8,8 @@
#include <stdbool.h>
#include <linux/list.h>
-#include "objtool.h"
-#include "cfi.h"
-
-#ifdef INSN_USE_ORC
-#include <asm/orc_types.h>
-#endif
+#include <objtool/objtool.h>
+#include <objtool/cfi.h>
enum insn_type {
INSN_JUMP_CONDITIONAL,
diff --git a/tools/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 85c979caa367..2502bb27de17 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,7 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h
index c7c59c6a44ee..fd5cb0bed9bf 100644
--- a/tools/objtool/cfi.h
+++ b/tools/objtool/include/objtool/cfi.h
@@ -6,7 +6,7 @@
#ifndef _OBJTOOL_CFI_H
#define _OBJTOOL_CFI_H
-#include "cfi_regs.h"
+#include <arch/cfi_regs.h>
#define CFI_UNDEFINED -1
#define CFI_CFA -2
diff --git a/tools/objtool/check.h b/tools/objtool/include/objtool/check.h
index 5ec00a4b891b..f5be798107bc 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -7,8 +7,8 @@
#define _CHECK_H
#include <stdbool.h>
-#include "cfi.h"
-#include "arch.h"
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
struct insn_state {
struct cfi_state cfi;
@@ -19,10 +19,28 @@ struct insn_state {
s8 instr;
};
+struct alt_group {
+ /*
+ * Pointer from a replacement group to the original group. NULL if it
+ * *is* the original group.
+ */
+ struct alt_group *orig_group;
+
+ /* First and last instructions in the group */
+ struct instruction *first_insn, *last_insn;
+
+ /*
+ * Byte-offset-addressed len-sized array of pointers to CFI structs.
+ * This is shared with the other alt_groups in the same alternative.
+ */
+ struct cfi_state **cfi;
+};
+
struct instruction {
struct list_head list;
struct hlist_node hash;
struct list_head static_call_node;
+ struct list_head mcount_loc_node;
struct section *sec;
unsigned long offset;
unsigned int len;
@@ -33,8 +51,7 @@ struct instruction {
bool retpoline_safe;
s8 instr;
u8 visited;
- u8 ret_offset;
- int alt_group;
+ struct alt_group *alt_group;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
@@ -43,9 +60,6 @@ struct instruction {
struct symbol *func;
struct list_head stack_ops;
struct cfi_state cfi;
-#ifdef INSN_USE_ORC
- struct orc_entry orc;
-#endif
};
static inline bool is_static_jump(struct instruction *insn)
@@ -54,6 +68,17 @@ static inline bool is_static_jump(struct instruction *insn)
insn->type == INSN_JUMP_UNCONDITIONAL;
}
+static inline bool is_dynamic_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_DYNAMIC ||
+ insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL;
+}
+
+static inline bool is_jump(struct instruction *insn)
+{
+ return is_static_jump(insn) || is_dynamic_jump(insn);
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
diff --git a/tools/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index e6890cc70a25..e6890cc70a25 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
new file mode 100644
index 000000000000..10241341eff3
--- /dev/null
+++ b/tools/objtool/include/objtool/endianness.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_ENDIANNESS_H
+#define _OBJTOOL_ENDIANNESS_H
+
+#include <arch/endianness.h>
+#include <linux/kernel.h>
+#include <endian.h>
+
+#ifndef __TARGET_BYTE_ORDER
+#error undefined arch __TARGET_BYTE_ORDER
+#endif
+
+#if __BYTE_ORDER != __TARGET_BYTE_ORDER
+#define __NEED_BSWAP 1
+#else
+#define __NEED_BSWAP 0
+#endif
+
+/*
+ * Does a byte swap if target endianness doesn't match the host, i.e. cross
+ * compilation for little endian on big endian and vice versa.
+ * To be used for multi-byte values conversion, which are read from / about
+ * to be written to a target native endianness ELF file.
+ */
+#define bswap_if_needed(val) \
+({ \
+ __typeof__(val) __ret; \
+ switch (sizeof(val)) { \
+ case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \
+ case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \
+ case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \
+ default: \
+ BUILD_BUG(); break; \
+ } \
+ __ret; \
+})
+
+#endif /* _OBJTOOL_ENDIANNESS_H */
diff --git a/tools/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 4125d4578b23..e68e37476c15 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -10,7 +10,7 @@
#include <linux/list.h>
#include <linux/hashtable.h>
-#include "elf.h"
+#include <objtool/elf.h>
#define __weak __attribute__((weak))
@@ -19,6 +19,7 @@ struct objtool_file {
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head static_call_list;
+ struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;
};
@@ -26,7 +27,6 @@ struct objtool_file *objtool_open_read(const char *_objname);
int check(struct objtool_file *file);
int orc_dump(const char *objname);
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
+int orc_create(struct objtool_file *file);
#endif /* _OBJTOOL_H */
diff --git a/tools/objtool/special.h b/tools/objtool/include/objtool/special.h
index abddf38ef334..8a09f4e9d480 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -7,8 +7,8 @@
#define _SPECIAL_H
#include <stdbool.h>
-#include "check.h"
-#include "elf.h"
+#include <objtool/check.h>
+#include <objtool/elf.h>
#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
diff --git a/tools/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index 7799f60de80a..d99c4675e4a5 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -11,7 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "elf.h"
+#include <objtool/elf.h>
extern const char *objname;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 9df0cd86d310..7b97ce499405 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -21,9 +21,9 @@
#include <subcmd/pager.h>
#include <linux/kernel.h>
-#include "builtin.h"
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
struct cmd_struct {
const char *name;
@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.static_call_list);
+ INIT_LIST_HEAD(&file.mcount_loc_list);
file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 5e6a95368d35..f5a8508c42d6 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -6,8 +6,9 @@
#include <unistd.h>
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
static const char *reg_name(unsigned int reg)
{
@@ -54,7 +55,7 @@ static void print_reg(unsigned int reg, int offset)
if (reg == ORC_REG_BP_INDIRECT)
printf("(bp%+d)", offset);
else if (reg == ORC_REG_SP_INDIRECT)
- printf("(sp%+d)", offset);
+ printf("(sp)%+d", offset);
else if (reg == ORC_REG_UNDEFINED)
printf("(und)");
else
@@ -197,11 +198,11 @@ int orc_dump(const char *_objname)
printf(" sp:");
- print_reg(orc[i].sp_reg, orc[i].sp_offset);
+ print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset));
printf(" bp:");
- print_reg(orc[i].bp_reg, orc[i].bp_offset);
+ print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset));
printf(" type:%s end:%d\n",
orc_type_name(orc[i].type), orc[i].end);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 9ce68b385a1b..738aa5021bc4 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -9,93 +9,91 @@
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#include "check.h"
-#include "warn.h"
+#include <objtool/check.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
-int create_orc(struct objtool_file *file)
+static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
{
- struct instruction *insn;
+ struct instruction *insn = container_of(cfi, struct instruction, cfi);
+ struct cfi_reg *bp = &cfi->regs[CFI_BP];
- for_each_insn(file, insn) {
- struct orc_entry *orc = &insn->orc;
- struct cfi_reg *cfa = &insn->cfi.cfa;
- struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+ memset(orc, 0, sizeof(*orc));
- if (!insn->sec->text)
- continue;
-
- orc->end = insn->cfi.end;
-
- if (cfa->base == CFI_UNDEFINED) {
- orc->sp_reg = ORC_REG_UNDEFINED;
- continue;
- }
+ orc->end = cfi->end;
- switch (cfa->base) {
- case CFI_SP:
- orc->sp_reg = ORC_REG_SP;
- break;
- case CFI_SP_INDIRECT:
- orc->sp_reg = ORC_REG_SP_INDIRECT;
- break;
- case CFI_BP:
- orc->sp_reg = ORC_REG_BP;
- break;
- case CFI_BP_INDIRECT:
- orc->sp_reg = ORC_REG_BP_INDIRECT;
- break;
- case CFI_R10:
- orc->sp_reg = ORC_REG_R10;
- break;
- case CFI_R13:
- orc->sp_reg = ORC_REG_R13;
- break;
- case CFI_DI:
- orc->sp_reg = ORC_REG_DI;
- break;
- case CFI_DX:
- orc->sp_reg = ORC_REG_DX;
- break;
- default:
- WARN_FUNC("unknown CFA base reg %d",
- insn->sec, insn->offset, cfa->base);
- return -1;
- }
+ if (cfi->cfa.base == CFI_UNDEFINED) {
+ orc->sp_reg = ORC_REG_UNDEFINED;
+ return 0;
+ }
- switch(bp->base) {
- case CFI_UNDEFINED:
- orc->bp_reg = ORC_REG_UNDEFINED;
- break;
- case CFI_CFA:
- orc->bp_reg = ORC_REG_PREV_SP;
- break;
- case CFI_BP:
- orc->bp_reg = ORC_REG_BP;
- break;
- default:
- WARN_FUNC("unknown BP base reg %d",
- insn->sec, insn->offset, bp->base);
- return -1;
- }
+ switch (cfi->cfa.base) {
+ case CFI_SP:
+ orc->sp_reg = ORC_REG_SP;
+ break;
+ case CFI_SP_INDIRECT:
+ orc->sp_reg = ORC_REG_SP_INDIRECT;
+ break;
+ case CFI_BP:
+ orc->sp_reg = ORC_REG_BP;
+ break;
+ case CFI_BP_INDIRECT:
+ orc->sp_reg = ORC_REG_BP_INDIRECT;
+ break;
+ case CFI_R10:
+ orc->sp_reg = ORC_REG_R10;
+ break;
+ case CFI_R13:
+ orc->sp_reg = ORC_REG_R13;
+ break;
+ case CFI_DI:
+ orc->sp_reg = ORC_REG_DI;
+ break;
+ case CFI_DX:
+ orc->sp_reg = ORC_REG_DX;
+ break;
+ default:
+ WARN_FUNC("unknown CFA base reg %d",
+ insn->sec, insn->offset, cfi->cfa.base);
+ return -1;
+ }
- orc->sp_offset = cfa->offset;
- orc->bp_offset = bp->offset;
- orc->type = insn->cfi.type;
+ switch (bp->base) {
+ case CFI_UNDEFINED:
+ orc->bp_reg = ORC_REG_UNDEFINED;
+ break;
+ case CFI_CFA:
+ orc->bp_reg = ORC_REG_PREV_SP;
+ break;
+ case CFI_BP:
+ orc->bp_reg = ORC_REG_BP;
+ break;
+ default:
+ WARN_FUNC("unknown BP base reg %d",
+ insn->sec, insn->offset, bp->base);
+ return -1;
}
+ orc->sp_offset = cfi->cfa.offset;
+ orc->bp_offset = bp->offset;
+ orc->type = cfi->type;
+
return 0;
}
-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
- unsigned int idx, struct section *insn_sec,
- unsigned long insn_off, struct orc_entry *o)
+static int write_orc_entry(struct elf *elf, struct section *orc_sec,
+ struct section *ip_rsec, unsigned int idx,
+ struct section *insn_sec, unsigned long insn_off,
+ struct orc_entry *o)
{
struct orc_entry *orc;
struct reloc *reloc;
/* populate ORC data */
- orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+ orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
memcpy(orc, o, sizeof(*orc));
+ orc->sp_offset = bswap_if_needed(orc->sp_offset);
+ orc->bp_offset = bswap_if_needed(orc->bp_offset);
/* populate reloc for ip */
reloc = malloc(sizeof(*reloc));
@@ -114,102 +112,149 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
reloc->type = R_X86_64_PC32;
reloc->offset = idx * sizeof(int);
- reloc->sec = ip_relocsec;
+ reloc->sec = ip_rsec;
elf_add_reloc(elf, reloc);
return 0;
}
-int create_orc_sections(struct objtool_file *file)
-{
- struct instruction *insn, *prev_insn;
- struct section *sec, *u_sec, *ip_relocsec;
- unsigned int idx;
+struct orc_list_entry {
+ struct list_head list;
+ struct orc_entry orc;
+ struct section *insn_sec;
+ unsigned long insn_off;
+};
- struct orc_entry empty = {
- .sp_reg = ORC_REG_UNDEFINED,
- .bp_reg = ORC_REG_UNDEFINED,
- .type = UNWIND_HINT_TYPE_CALL,
- };
+static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc,
+ struct section *sec, unsigned long offset)
+{
+ struct orc_list_entry *entry = malloc(sizeof(*entry));
- sec = find_section_by_name(file->elf, ".orc_unwind");
- if (sec) {
- WARN("file already has .orc_unwind section, skipping");
+ if (!entry) {
+ WARN("malloc failed");
return -1;
}
- /* count the number of needed orcs */
- idx = 0;
- for_each_sec(file, sec) {
- if (!sec->text)
- continue;
-
- prev_insn = NULL;
- sec_for_each_insn(file, sec, insn) {
- if (!prev_insn ||
- memcmp(&insn->orc, &prev_insn->orc,
- sizeof(struct orc_entry))) {
- idx++;
- }
- prev_insn = insn;
- }
-
- /* section terminator */
- if (prev_insn)
- idx++;
- }
- if (!idx)
- return -1;
+ entry->orc = *orc;
+ entry->insn_sec = sec;
+ entry->insn_off = offset;
+ list_add_tail(&entry->list, orc_list);
+ return 0;
+}
- /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
- if (!sec)
- return -1;
+static unsigned long alt_group_len(struct alt_group *alt_group)
+{
+ return alt_group->last_insn->offset +
+ alt_group->last_insn->len -
+ alt_group->first_insn->offset;
+}
- ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
- if (!ip_relocsec)
- return -1;
+int orc_create(struct objtool_file *file)
+{
+ struct section *sec, *ip_rsec, *orc_sec;
+ unsigned int nr = 0, idx = 0;
+ struct orc_list_entry *entry;
+ struct list_head orc_list;
- /* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
- sizeof(struct orc_entry), idx);
+ struct orc_entry null = {
+ .sp_reg = ORC_REG_UNDEFINED,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = UNWIND_HINT_TYPE_CALL,
+ };
- /* populate sections */
- idx = 0;
+ /* Build a deduplicated list of ORC entries: */
+ INIT_LIST_HEAD(&orc_list);
for_each_sec(file, sec) {
+ struct orc_entry orc, prev_orc = {0};
+ struct instruction *insn;
+ bool empty = true;
+
if (!sec->text)
continue;
- prev_insn = NULL;
sec_for_each_insn(file, sec, insn) {
- if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
- sizeof(struct orc_entry))) {
+ struct alt_group *alt_group = insn->alt_group;
+ int i;
- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
- insn->sec, insn->offset,
- &insn->orc))
+ if (!alt_group) {
+ if (init_orc_entry(&orc, &insn->cfi))
return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+ if (orc_list_add(&orc_list, &orc, sec,
+ insn->offset))
+ return -1;
+ nr++;
+ prev_orc = orc;
+ empty = false;
+ continue;
+ }
- idx++;
+ /*
+ * Alternatives can have different stack layout
+ * possibilities (but they shouldn't conflict).
+ * Instead of traversing the instructions, use the
+ * alt_group's flattened byte-offset-addressed CFI
+ * array.
+ */
+ for (i = 0; i < alt_group_len(alt_group); i++) {
+ struct cfi_state *cfi = alt_group->cfi[i];
+ if (!cfi)
+ continue;
+ if (init_orc_entry(&orc, cfi))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+ if (orc_list_add(&orc_list, &orc, insn->sec,
+ insn->offset + i))
+ return -1;
+ nr++;
+ prev_orc = orc;
+ empty = false;
}
- prev_insn = insn;
- }
- /* section terminator */
- if (prev_insn) {
- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
- prev_insn->sec,
- prev_insn->offset + prev_insn->len,
- &empty))
- return -1;
+ /* Skip to the end of the alt_group */
+ insn = alt_group->last_insn;
+ }
- idx++;
+ /* Add a section terminator */
+ if (!empty) {
+ orc_list_add(&orc_list, &null, sec, sec->len);
+ nr++;
}
}
+ if (!nr)
+ return 0;
+
+ /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */
+ sec = find_section_by_name(file->elf, ".orc_unwind");
+ if (sec) {
+ WARN("file already has .orc_unwind section, skipping");
+ return -1;
+ }
+ orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+ sizeof(struct orc_entry), nr);
+ if (!orc_sec)
+ return -1;
+
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ if (!sec)
+ return -1;
+ ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!ip_rsec)
+ return -1;
+
+ /* Write ORC entries to sections: */
+ list_for_each_entry(entry, &orc_list, list) {
+ if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
+ entry->insn_sec, entry->insn_off,
+ &entry->orc))
+ return -1;
+ }
- if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
+ if (elf_rebuild_reloc_section(file->elf, ip_rsec))
return -1;
return 0;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 1a2420febd08..2c7fbda7b055 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -11,10 +11,11 @@
#include <stdlib.h>
#include <string.h>
-#include "builtin.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_special.h"
+#include <arch/special.h>
+#include <objtool/builtin.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
struct special_entry {
const char *sec;
@@ -77,8 +78,9 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
if (entry->feature) {
unsigned short feature;
- feature = *(unsigned short *)(sec->data->d_buf + offset +
- entry->feature);
+ feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf +
+ offset +
+ entry->feature));
arch_handle_alternative(feature, alt);
}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 7843e9a7a72f..8314e824db4a 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -7,7 +7,7 @@
#include <stdbool.h>
#include <errno.h>
-#include "objtool.h"
+#include <objtool/objtool.h>
#define UNSUPPORTED(name) \
({ \
@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname)
UNSUPPORTED("orc");
}
-int __weak create_orc(struct objtool_file *file)
-{
- UNSUPPORTED("orc");
-}
-
-int __weak create_orc_sections(struct objtool_file *file)
+int __weak orc_create(struct objtool_file *file)
{
UNSUPPORTED("orc");
}
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index c0a66400a960..9af8b8dfb7b6 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -29,7 +29,7 @@ OPTIONS
Show just the sample frequency used for each event.
-v::
---verbose=::
+--verbose::
Show all fields.
-g::
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 1e91121bac0f..6e82b7cc0bf0 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -28,8 +28,8 @@ OPTIONS
specified: function_graph or function.
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
-F::
--funcs::
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index f3c620951f6e..c97527df8ecd 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -20,5 +20,5 @@ modules).
OPTIONS
-------
-v::
---verbose=::
+--verbose::
Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index abc9b5d83312..f0da8cf63e9a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different.
Filter out events for these pids and for 'trace' itself (comma separated list).
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
--no-inherit::
Child tasks do not inherit counters.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f43d2551f3de..a7d768fdc8a1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS):
### Cleaning rules
-#
-# This is here, not in Makefile.config, because Makefile.config does
-# not get included for the clean target:
-#
-config-clean:
- $(call QUIET_CLEAN, config)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-
python-clean:
$(python-clean)
@@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL
bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 911c7f2b3581..0a44bc462cec 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -191,6 +191,10 @@ out:
return err;
}
+#define ETM_SET_OPT_CTXTID (1 << 0)
+#define ETM_SET_OPT_TS (1 << 1)
+#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
+
static int cs_etm_set_option(struct auxtrace_record *itr,
struct evsel *evsel, u32 option)
{
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 96b2157f0371..0b2480cf3e47 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -521,3 +521,4 @@
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index d443423495e5..3abef2144dac 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
439 common faccessat2 sys_faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8cc6642fce7a..5a9f9a7bf07d 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1
# Syscall table generation
#
-out := $(OUTPUT)arch/x86/include/generated/asm
-header := $(out)/syscalls_64.c
-sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl := $(sys)/syscalltbl.sh
+generated := $(OUTPUT)arch/x86/include/generated
+out := $(generated)/asm
+header := $(out)/syscalls_64.c
+sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
@@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl)
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
clean::
- $(call QUIET_CLEAN, x86) $(RM) $(header)
+ $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78672124d28b..7bf01cbe582f 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a54b94f1c25..0e20f3dc69f3 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
+int test__x86_sample_parsing(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 36d4f248b51d..28d793390198 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += rdpmc.o
+perf-y += sample-parsing.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index bc25d727b4e9..71aa67367ad6 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -31,6 +31,10 @@ struct test arch_tests[] = {
},
#endif
{
+ .desc = "x86 Sample parsing",
+ .func = test__x86_sample_parsing,
+ },
+ {
.func = NULL,
},
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index f782ef8c5982..4f75ae990140 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
-#include "../../../../arch/x86/include/asm/insn.h"
#include <string.h>
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "../../../../arch/x86/include/asm/insn.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
new file mode 100644
index 000000000000..c92db87e4479
--- /dev/null
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+#include "util/synthetic-events.h"
+
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define COMP(m) do { \
+ if (s1->m != s2->m) { \
+ pr_debug("Samples differ at '"#m"'\n"); \
+ return false; \
+ } \
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+ const struct perf_sample *s2,
+ u64 type)
+{
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+ COMP(ins_lat);
+
+ return true;
+}
+
+static int do_test(u64 sample_type)
+{
+ struct evsel evsel = {
+ .needs_swap = false,
+ .core = {
+ . attr = {
+ .sample_type = sample_type,
+ .read_format = 0,
+ },
+ },
+ };
+ union perf_event *event;
+ struct perf_sample sample = {
+ .weight = 101,
+ .ins_lat = 102,
+ };
+ struct perf_sample sample_out;
+ size_t i, sz, bufsz;
+ int err, ret = -1;
+
+ sz = perf_event__sample_event_size(&sample, sample_type, 0);
+ bufsz = sz + 4096; /* Add a bit for overrun checking */
+ event = malloc(bufsz);
+ if (!event) {
+ pr_debug("malloc failed\n");
+ return -1;
+ }
+
+ memset(event, 0xff, bufsz);
+ event->header.type = PERF_RECORD_SAMPLE;
+ event->header.misc = 0;
+ event->header.size = sz;
+
+ err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "perf_event__synthesize_sample", sample_type, err);
+ goto out_free;
+ }
+
+ /* The data does not contain 0xff so we use that to check the size */
+ for (i = bufsz; i > 0; i--) {
+ if (*(i - 1 + (u8 *)event) != 0xff)
+ break;
+ }
+ if (i != sz) {
+ pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+ i, sz);
+ goto out_free;
+ }
+
+ evsel.sample_size = __evsel__sample_size(sample_type);
+
+ err = evsel__parse_sample(&evsel, event, &sample_out);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "evsel__parse_sample", sample_type, err);
+ goto out_free;
+ }
+
+ if (!samples_same(&sample, &sample_out, sample_type)) {
+ pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+ sample_type);
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ free(event);
+
+ return ret;
+}
+
+/**
+ * test__x86_sample_parsing - test X86 specific sample parsing
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ *
+ * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
+ * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
+ */
+int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
+}
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 3e6791531ca5..34d600c51044 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
#include "archinsn.h"
#include "event.h"
#include "machine.h"
#include "thread.h"
#include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 11726ec6285f..20b87e29c96f 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -344,18 +344,22 @@ static void mempol_restore(void)
static void bind_to_memnode(int node)
{
- unsigned long nodemask;
+ struct bitmask *node_mask;
int ret;
if (node == NUMA_NO_NODE)
return;
- BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
- nodemask = 1L << node;
+ node_mask = numa_allocate_nodemask();
+ BUG_ON(!node_mask);
- ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
- dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+ numa_bitmask_clearall(node_mask);
+ numa_bitmask_setbit(node_mask, node);
+ ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
+
+ numa_bitmask_free(node_mask);
BUG_ON(ret);
}
@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
prctl(0, bytes_worked);
}
-#define MAX_NR_NODES 64
-
/*
* Count the number of nodes a process's threads
* are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
*/
static int count_process_nodes(int process_nr)
{
- char node_present[MAX_NR_NODES] = { 0, };
+ char *node_present;
int nodes;
int n, t;
+ node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+ BUG_ON(!node_present);
+ for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+ node_present[nodes] = 0;
+
for (t = 0; t < g->p.nr_threads; t++) {
struct thread_data *td;
int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
node = numa_node_of_cpu(td->curr_cpu);
- if (node < 0) /* curr_cpu was likely still -1 */
+ if (node < 0) /* curr_cpu was likely still -1 */ {
+ free(node_present);
return 0;
+ }
node_present[node] = 1;
}
nodes = 0;
- for (n = 0; n < MAX_NR_NODES; n++)
+ for (n = 0; n < g->p.nr_nodes; n++)
nodes += node_present[n];
+ free(node_present);
return nodes;
}
@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
{
unsigned int loops_done_min, loops_done_max;
int process_groups;
- int nodes[MAX_NR_NODES];
+ int *nodes;
int distance;
int nr_min;
int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
if (!g->p.show_convergence && !g->p.measure_convergence)
return;
+ nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
+ BUG_ON(!nodes);
for (node = 0; node < g->p.nr_nodes; node++)
nodes[node] = 0;
@@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
BUG_ON(sum > g->p.nr_tasks);
- if (0 && (sum < g->p.nr_tasks))
+ if (0 && (sum < g->p.nr_tasks)) {
+ free(nodes);
return;
+ }
/*
* Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
}
tprintf("\n");
}
+
+ free(nodes);
}
static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@ static int init(void)
g->p.nr_nodes = numa_max_node() + 1;
/* char array in count_process_nodes(): */
- BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+ BUG_ON(g->p.nr_nodes < 0);
if (g->p.show_quiet && !g->p.show_details)
g->p.show_details = -1;
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cecce93ccc63..488f6e6ba1a5 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv)
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
case BENCH_FORMAT_SIMPLE:
- printf("%lu.%03lu\n", diff.tv_sec,
+ printf("%lu.%03lu\n", (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 3c88d1f201f1..a960e7a93aec 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
printf(" %14lf usecs/op\n",
@@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
index 5fe621cff8e9..9b751016f4b6 100644
--- a/tools/perf/bench/syscall.c
+++ b/tools/perf/bench/syscall.c
@@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec/1000));
printf(" %14lf usecs/op\n",
@@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / 1000));
break;
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 617feaf020f6..ace8772a4f03 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va
struct daemon_session *session;
char name[100];
- if (get_session_name(var, name, sizeof(name)))
+ if (get_session_name(var, name, sizeof(name) - 1))
return -EINVAL;
var = strchr(var, '.');
@@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session,
dup2(fd, 2);
close(fd);
- if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) {
+ if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) {
perror("failed: create control fifo");
return -1;
}
- if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) {
+ if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) {
perror("failed: create ack fifo");
return -1;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8f6c784ce629..878e04b1fab7 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1236,7 +1236,8 @@ static int __cmd_diff(void)
out_delete:
data__for_each_file(i, d) {
- perf_session__delete(d->session);
+ if (!IS_ERR(d->session))
+ perf_session__delete(d->session);
data__free(d);
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 85b6a46e85b6..7ec18ff57fc4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__config(evlist, &trace->opts, &callchain_param);
- signal(SIGCHLD, sig_handler);
- signal(SIGINT, sig_handler);
-
if (forks) {
err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
if (err < 0) {
@@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv)
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
trace.evlist = evlist__new();
trace.sctbl = syscalltbl__new();
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 0cfb3e2cefef..133f0eddbcc4 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -20,9 +20,8 @@ else
fi
BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
-NOBUILDID=0000000000000000000000000000000000000000
-perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS
if [ ! -s $BUILDIDS ] ; then
echo "perf archive: no build-ids found"
rm $BUILDIDS || true
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index ec972e0892ab..dd39ce9b0277 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
struct stat st;
char path_perf[PATH_MAX];
char path_dir[PATH_MAX];
+ char *exec_path;
/* First try development tree tests. */
if (!lstat("./tests", &st))
return run_dir("./tests", "./perf");
+ exec_path = get_argv_exec_path();
+ if (exec_path == NULL)
+ return -1;
+
/* Then installed path. */
- snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+ snprintf(path_dir, PATH_MAX, "%s/tests", exec_path);
snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+ free(exec_path);
if (!lstat(path_dir, &st) &&
!lstat(path_perf, &st))
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 280f0348a09c..2fdc7b2f996e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore)
out_put:
thread__put(thread);
out_err:
-
- if (evlist) {
- evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
- }
+ evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
machine__delete_threads(machine);
machine__delete(machine);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 29c793ac7d10..0472b110fe65 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -106,6 +106,8 @@ static int cpu_map_print(const char *str)
return -1;
cpu_map__snprint(map, buf, sizeof(buf));
+ perf_cpu_map__put(map);
+
return !strcmp(buf, str);
}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index e6f1b2a38e03..a0438b0f0805 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -154,10 +154,9 @@ out_err:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 57093aeacc6f..73ae8f7aa066 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -158,8 +158,6 @@ out_init:
out_delete_evlist:
evlist__delete(evlist);
- cpus = NULL;
- threads = NULL;
out_free_cpus:
perf_cpu_map__put(cpus);
out_free_threads:
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 7cff02664d0e..680c3cffb128 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -167,6 +167,8 @@ next_event:
out_err:
evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 0dbe3aa99853..8fd8a4ef97da 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_WEIGHT)
COMP(weight);
- if (type & PERF_SAMPLE_WEIGHT_STRUCT)
- COMP(ins_lat);
-
if (type & PERF_SAMPLE_DATA_SRC)
COMP(data_src);
@@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.cgroup = 114,
.data_page_size = 115,
.code_page_size = 116,
- .ins_lat = 117,
.aux_sample = {
.size = sizeof(aux_data),
.data = (void *)aux_data,
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index e5b824dd08d9..5ad3ca8d681b 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -140,10 +140,10 @@ test_list()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -159,14 +159,14 @@ EOF
# check 1st session
# pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
- check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \
+ check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
${base}/session-size/output ${base}/session-size/control \
${base}/session-size/ack "0"
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control \
${base}/session-time/ack "0"
@@ -190,10 +190,10 @@ test_reconfig()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -204,7 +204,7 @@ EOF
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
@@ -215,10 +215,10 @@ EOF
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
EOF
# TEST 1 - change config
@@ -238,7 +238,7 @@ EOF
# check reconfigured 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
# TEST 2 - empty config
@@ -309,10 +309,10 @@ test_stop()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -361,7 +361,7 @@ test_signal()
base=BASE
[session-test]
-run = -e cpu-clock --switch-output
+run = -e cpu-clock --switch-output -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -400,10 +400,10 @@ test_ping()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -439,7 +439,7 @@ test_lock()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index a49c9e23053b..74988846be1d 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
.disabled = 1,
.freq = 1,
};
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
+ struct perf_cpu_map *cpus = NULL;
+ struct perf_thread_map *threads = NULL;
struct mmap *md;
attr.sample_freq = 500;
@@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
if (evlist__open(evlist)) {
const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
@@ -129,10 +126,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 15a2ab765d89..3ebaa758df77 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -574,10 +574,9 @@ out:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bbf94e4aa145..4c2969db59b0 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
if (err < 0) {
pr_debug("Couldn't run the workload!\n");
@@ -137,7 +134,7 @@ out_init:
if (retry_count++ > 1000) {
pr_debug("Failed after retrying 1000 times\n");
err = -1;
- goto out_free_maps;
+ goto out_delete_evlist;
}
goto retry;
@@ -148,10 +145,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 28f51c4bd373..d1e208b4a571 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __
TEST_ASSERT_VAL("failed to synthesize map",
!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+ perf_thread_map__put(threads);
return 0;
}
@@ -109,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
{
struct perf_thread_map *threads;
char *str;
- int i;
TEST_ASSERT_VAL("failed to allocate map string",
asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
threads = thread_map__new_str(str, NULL, 0, false);
+ free(str);
TEST_ASSERT_VAL("failed to allocate thread_map",
threads);
@@ -141,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
TEST_ASSERT_VAL("failed to not remove thread",
thread_map__remove(threads, 0));
- for (i = 0; i < threads->nr; i++)
- zfree(&threads->map[i].comm);
-
- free(threads);
+ perf_thread_map__put(threads);
return 0;
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5121b4db66fe..882cd1f721d9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist)
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
+ perf_evlist__reset_id_hash(&evlist->core);
}
static int evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1bf76864c4f2..7ecbc8e2fbfa 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,6 +46,7 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
+#include "hashmap.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include <internal/xyarray.h>
@@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
- zfree(&evsel->per_pkg_mask);
+ evsel__zero_per_pkg(evsel);
+ hashmap__free(evsel->per_pkg_mask);
+ evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
}
@@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
return store_evsel_ids(evsel, evlist);
}
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (evsel->per_pkg_mask) {
+ hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+ free((char *)cur->key);
+
+ hashmap__clear(evsel->per_pkg_mask);
+ }
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4e8e49fb7e9d..6026487353dd 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -19,6 +19,7 @@ struct perf_stat_evsel;
union perf_event;
struct bpf_counter_ops;
struct target;
+struct hashmap;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
@@ -112,7 +113,7 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
- unsigned long *per_pkg_mask;
+ struct hashmap *per_pkg_mask;
struct evsel *leader;
struct list_head config_terms;
int err;
@@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+void evsel__zero_per_pkg(struct evsel *evsel);
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4fe9e2a54346..20effdff76ce 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
- tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
- clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
clockid_name(clockid));
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 692e56dc832e..fbc40a2c17d4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
- size_t ndk_length;
- size_t app_length;
+ int ndk_length, app_length;
ndk = getenv("NDK_ROOT");
app = getenv("APP_PLATFORM");
@@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (new_length > PATH_MAX)
return false;
snprintf(newfilename, new_length,
- "%s/platforms/%s/arch-%s/usr/lib/%s",
- ndk, app, arch, libname);
+ "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+ ndk_length, ndk, app_length, app, arch, libname);
return true;
}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d5b6aff82f21..d57ac86ce7ca 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_EVENT_NAME
%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
%destructor { free ($$); } <str>
%type <term> event_term
%destructor { parse_events_term__delete ($$); } <term>
@@ -272,8 +273,11 @@ event_def: event_pmu |
event_legacy_raw sep_dc |
event_bpf_file
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
event_pmu:
-PE_NAME opt_pmu_config
+event_pmu_name opt_pmu_config
{
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 71b753523fac..845dd46e3c61 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -36,3 +36,4 @@ util/symbol_fprintf.c
util/units.c
util/affinity.c
util/rwsem.c
+util/hashmap.c
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0d5ad42812b9..552b590485bf 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__MEMORY)
+ if (sort__mode != SORT_MODE__BRANCH)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
@@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__BRANCH)
+ if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cce7a76d6473..7f09cdaf5b60 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config,
if (config->interval_clear)
puts(CONSOLE_CLEAR);
- sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
switch (config->aggr_mode) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5d8af29447f4..c400f8dde017 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -13,6 +13,7 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
+#include "hashmap.h"
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
}
}
-static void zero_per_pkg(struct evsel *counter)
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
{
- if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, cpu__max_cpu());
+ uint64_t *key = (uint64_t *) __key;
+
+ return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+ void *ctx __maybe_unused)
+{
+ uint64_t *key1 = (uint64_t *) __key1;
+ uint64_t *key2 = (uint64_t *) __key2;
+
+ return *key1 == *key2;
}
static int check_per_pkg(struct evsel *counter,
struct perf_counts_values *vals, int cpu, bool *skip)
{
- unsigned long *mask = counter->per_pkg_mask;
+ struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
- int s;
+ int s, d, ret = 0;
+ uint64_t *key;
*skip = false;
@@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter,
return 0;
if (!mask) {
- mask = zalloc(cpu__max_cpu());
+ mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
if (!mask)
return -ENOMEM;
@@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter,
if (s < 0)
return -1;
- *skip = test_and_set_bit(s, mask) == 1;
- return 0;
+ /*
+ * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+ * We use hashmap(socket, die) to check the used socket+die pair.
+ */
+ d = cpu_map__get_die(cpus, cpu, NULL).die;
+ if (d < 0)
+ return -1;
+
+ key = malloc(sizeof(*key));
+ if (!key)
+ return -ENOMEM;
+
+ *key = (uint64_t)d << 32 | s;
+ if (hashmap__find(mask, (void *)key, NULL))
+ *skip = true;
+ else
+ ret = hashmap__add(mask, (void *)key, (void *)1);
+
+ return ret;
}
static int
@@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
}
if (counter->per_pkg)
- zero_per_pkg(counter);
+ evsel__zero_per_pkg(counter);
ret = process_counter_maps(config, counter);
if (ret)
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f507dff713c9..8a01af783310 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent)
pr_debug("error reading saved cmdlines\n");
goto out;
}
+ buf[ret] = '\0';
parse_saved_cmdline(pevent, buf, size);
ret = 0;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 41f0a0adbb80..6c575cf34a71 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -33,6 +33,7 @@ TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
+TARGETS += mount_setattr
TARGETS += mqueue
TARGETS += nci
TARGETS += net
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 537d65968c48..fb23ce9617ea 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -12,9 +12,12 @@
#include <sys/mman.h>
#include <linux/types.h>
+#define NSEC_PER_MSEC 1000000L
+
#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark)
#define DMA_MAP_MAX_THREADS 1024
#define DMA_MAP_MAX_SECONDS 300
+#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC)
#define DMA_MAP_BIDIRECTIONAL 0
#define DMA_MAP_TO_DEVICE 1
@@ -36,7 +39,8 @@ struct map_benchmark {
__s32 node; /* which numa node this benchmark will run on */
__u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */
- __u8 expansion[84]; /* For future use */
+ __u32 dma_trans_ns; /* time for DMA transmission in ns */
+ __u8 expansion[80]; /* For future use */
};
int main(int argc, char **argv)
@@ -46,12 +50,12 @@ int main(int argc, char **argv)
/* default single thread, run 20 seconds on NUMA_NO_NODE */
int threads = 1, seconds = 20, node = -1;
/* default dma mask 32bit, bidirectional DMA */
- int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
int cmd = DMA_MAP_BENCHMARK;
char *p;
- while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+ while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
switch (opt) {
case 't':
threads = atoi(optarg);
@@ -68,6 +72,9 @@ int main(int argc, char **argv)
case 'd':
dir = atoi(optarg);
break;
+ case 'x':
+ xdelay = atoi(optarg);
+ break;
default:
return -1;
}
@@ -85,6 +92,12 @@ int main(int argc, char **argv)
exit(1);
}
+ if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+ fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+ DMA_MAP_MAX_TRANS_DELAY);
+ exit(1);
+ }
+
/* suppose the mininum DMA zone is 1MB in the world */
if (bits < 20 || bits > 64) {
fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -109,6 +122,8 @@ int main(int argc, char **argv)
map.node = node;
map.dma_bits = bits;
map.dma_dir = dir;
+ map.dma_trans_ns = xdelay;
+
if (ioctl(fd, cmd, &map)) {
perror("ioctl");
exit(1);
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3a84394829ea..32b87cc77c8e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -33,6 +33,7 @@
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
+/hardware_disable_test
/kvm_create_max_vcpus
/memslot_modification_stress_test
/set_memory_region_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 8c8eda429576..a6d61f451f88 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644
index 000000000000..2f2eeb8a1d86
--- /dev/null
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+#define GUEST_CODE_PIO_PORT 4
+
+sem_t *sem;
+
+/* Arguments for the pthreads */
+struct payload {
+ struct kvm_vm *vm;
+ uint32_t index;
+};
+
+static void guest_code(void)
+{
+ for (;;)
+ ; /* Some busy work */
+ printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+ struct payload *payload = (struct payload *)arg;
+ struct kvm_run *state = vcpu_state(payload->vm, payload->index);
+
+ vcpu_run(payload->vm, payload->index);
+
+ TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+ __func__, state->exit_reason,
+ exit_reason_str(state->exit_reason));
+ pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+ int fd;
+
+ while (true) {
+ fd = open("/dev/null", O_RDWR);
+ close(fd);
+ }
+ TEST_ASSERT(false, "%s: exited\n", __func__);
+ pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+ void *(*f)(void *), void *arg)
+{
+ int r;
+
+ r = pthread_create(thread, attr, f, arg);
+ TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+ int r;
+
+ r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+ TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+ int r;
+
+ r = pthread_join(thread, retval);
+ TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+ struct kvm_vm *vm;
+ cpu_set_t cpu_set;
+ pthread_t threads[VCPU_NUM];
+ pthread_t throw_away;
+ struct payload payloads[VCPU_NUM];
+ void *b;
+ uint32_t i, j;
+
+ CPU_ZERO(&cpu_set);
+ for (i = 0; i < VCPU_NUM; i++)
+ CPU_SET(i, &cpu_set);
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_create_irqchip(vm);
+
+ fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run);
+ for (i = 0; i < VCPU_NUM; ++i) {
+ vm_vcpu_add_default(vm, i, guest_code);
+ payloads[i].vm = vm;
+ payloads[i].index = i;
+
+ check_create_thread(&threads[i], NULL, run_vcpu,
+ (void *)&payloads[i]);
+ check_set_affinity(threads[i], &cpu_set);
+
+ for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+ check_create_thread(&throw_away, NULL, sleeping_thread,
+ (void *)NULL);
+ check_set_affinity(throw_away, &cpu_set);
+ }
+ }
+ fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run);
+ sem_post(sem);
+ for (i = 0; i < VCPU_NUM; ++i)
+ check_join(threads[i], &b);
+ /* Should not be reached */
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t i;
+ int s, r;
+ pid_t pid;
+
+ sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+ sem_unlink("vm_sem");
+
+ for (i = 0; i < FORK_NUM; ++i) {
+ pid = fork();
+ TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+ if (pid == 0)
+ run_test(i); /* This function always exits */
+
+ fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i);
+ sem_wait(sem);
+ r = (rand() % DELAY_US_MAX) + 1;
+ fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r);
+ usleep(r);
+ r = waitpid(pid, &s, WNOHANG);
+ TEST_ASSERT(r != pid,
+ "%s: [%d] child exited unexpectedly status: [%d]",
+ __func__, i, s);
+ fprintf(stderr, "%s: [%d] killing child\n", __func__, i);
+ kill(pid, SIGKILL);
+ }
+
+ sem_destroy(sem);
+ exit(0);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d787cb802b4a..e5fbf16f725b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -21,6 +21,8 @@
#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
+static int vcpu_mmap_sz(void);
+
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static void *align(void *x, size_t size)
{
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
+ ret = munmap(vcpu->state, vcpu_mmap_sz());
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
"vcpu id: %u errno: %i", vcpuid, errno);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index de0c76177d02..a8906e60a108 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -720,7 +720,8 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct kvm_cpuid2 *cpuid;
- int rc, max_ent;
+ int max_ent;
+ int rc = -1;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 9246ea310587..804ff5ff022d 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -13,19 +13,27 @@
#include <stdint.h>
#include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
#define VCPU_ID 5
+#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
static struct kvm_vm *vm;
#define XEN_HYPERCALL_MSR 0x40000000
+#define MIN_STEAL_TIME 50000
+
struct pvclock_vcpu_time_info {
u32 version;
u32 pad0;
@@ -43,11 +51,67 @@ struct pvclock_wall_clock {
u32 nsec;
} __attribute__((__packed__));
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[4];
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
static void guest_code(void)
{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(4);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(5);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
GUEST_DONE();
}
+static long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+ fclose(fp);
+
+ return val[1];
+}
+
static int cmp_timespec(struct timespec *a, struct timespec *b)
{
if (a->tv_sec > b->tv_sec)
@@ -66,12 +130,14 @@ int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
- if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
- KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
exit(KSFT_SKIP);
}
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
clock_gettime(CLOCK_REALTIME, &min_ts);
vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
@@ -80,6 +146,7 @@ int main(int argc, char *argv[])
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -111,6 +178,17 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+ rs->state = 0x5a;
+
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
@@ -126,8 +204,56 @@ int main(int argc, char *argv[])
case UCALL_ABORT:
TEST_FAIL("%s", (const char *)uc.args[0]);
/* NOT REACHED */
- case UCALL_SYNC:
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ /* If no runstate support, bail out early */
+ if (!do_runstate_tests)
+ goto done;
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case RUNSTATE_running...RUNSTATE_offline:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1];
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 4:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case 5:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 6:
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+ }
break;
+ }
case UCALL_DONE:
goto done;
default:
@@ -162,6 +288,33 @@ int main(int argc, char *argv[])
TEST_ASSERT(ti2->version && !(ti2->version & 1),
"Bad time_info version %x", ti->version);
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore
new file mode 100644
index 000000000000..5f74d8488472
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/.gitignore
@@ -0,0 +1 @@
+mount_setattr_test
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
new file mode 100644
index 000000000000..2250f7dcb81e
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread
+
+TEST_GEN_FILES += mount_setattr_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
new file mode 100644
index 000000000000..4e94e566e040
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/sysinfo.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#define DEFAULT_THREADS 4
+#define ptr_to_int(p) ((int)((intptr_t)(p)))
+#define int_to_ptr(u) ((void *)((intptr_t)(u)))
+
+#ifndef __NR_mount_setattr
+ #if defined __alpha__
+ #define __NR_mount_setattr 552
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_mount_setattr (442 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_mount_setattr (442 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_mount_setattr (442 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_mount_setattr (442 + 1024)
+ #else
+ #define __NR_mount_setattr 442
+ #endif
+
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree (428 + 1024)
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000
+#endif
+
+static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ unsigned int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (stat.f_flag &
+ ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
+ ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+ return -EINVAL;
+
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+
+ return mnt_flags;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+static void *mount_setattr_thread(void *data)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
+ .attr_clr = 0,
+ .propagation = MS_SHARED,
+ };
+
+ if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
+ pthread_exit(int_to_ptr(-1));
+
+ pthread_exit(int_to_ptr(0));
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+static bool mount_setattr_supported(void)
+{
+ int ret;
+
+ ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
+ if (ret < 0 && errno == ENOSYS)
+ return false;
+
+ return true;
+}
+
+FIXTURE(mount_setattr) {
+};
+
+FIXTURE_SETUP(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, invalid_attributes)
+{
+ struct mount_attr invalid_attr = {
+ .attr_set = (1U << 31),
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = 0;
+ invalid_attr.attr_clr = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_clr = 0;
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = (1U << 31);
+ invalid_attr.attr_clr = (1U << 31);
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+}
+
+TEST_F(mount_setattr, extensibility)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ char *s = "dummy";
+ struct mount_attr invalid_attr = {};
+ struct mount_attr_large {
+ struct mount_attr attr1;
+ struct mount_attr attr2;
+ struct mount_attr attr3;
+ } large_attr = {};
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EFAULT);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = 0;
+ large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, basic)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+}
+
+TEST_F(mount_setattr, basic_recursive)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_RDONLY;
+ attr.propagation = MS_SHARED;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RDONLY;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open for writing so this needs to fail somewhere
+ * in the middle and the mount options need to be unchanged.
+ */
+ attr.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, mount_has_writers)
+{
+ int fd, dfd;
+ unsigned int old_flags = 0, new_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ .propagation = MS_SHARED,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open to a mount somwhere in the middle so this
+ * needs to fail somewhere in the middle. After this the mount options
+ * need to be unchanged.
+ */
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
+
+ dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(dfd, 0);
+ EXPECT_EQ(fsync(dfd), 0);
+ EXPECT_EQ(close(dfd), 0);
+
+ EXPECT_EQ(fsync(fd), 0);
+ EXPECT_EQ(close(fd), 0);
+
+ /* All writers are gone so this should succeed. */
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+}
+
+TEST_F(mount_setattr, mixed_mount_options)
+{
+ unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
+ .attr_set = MOUNT_ATTR_RELATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags1 = read_mnt_flags("/mnt/B");
+ ASSERT_GT(old_flags1, 0);
+
+ old_flags2 = read_mnt_flags("/mnt/B/BB");
+ ASSERT_GT(old_flags2, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, time_changes)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = 0;
+ attr.attr_clr = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_clr = MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_NOATIME;
+ expected_flags |= MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_NOATIME;
+ attr.attr_set |= MOUNT_ATTR_RELATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_RELATIME;
+ attr.attr_set |= MOUNT_ATTR_STRICTATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
+ attr.attr_set |= MOUNT_ATTR_NOATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags |= MS_NOATIME;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_NODIRATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, multi_threaded)
+{
+ int i, j, nthreads, ret = 0;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ pthread_attr_t pattr;
+ pthread_t threads[DEFAULT_THREADS];
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ /* Try to change mount options from multiple threads. */
+ nthreads = get_nprocs_conf();
+ if (nthreads > DEFAULT_THREADS)
+ nthreads = DEFAULT_THREADS;
+
+ pthread_attr_init(&pattr);
+ for (i = 0; i < nthreads; i++)
+ ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
+
+ for (j = 0; j < i; j++) {
+ void *retptr = NULL;
+
+ EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
+
+ ret += ptr_to_int(retptr);
+ EXPECT_EQ(ret, 0);
+ }
+ pthread_attr_destroy(&pattr);
+
+ ASSERT_EQ(ret, 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOSUID;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+}
+
+TEST_F(mount_setattr, wrong_user_namespace)
+{
+ int ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+ ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, wrong_mount_namespace)
+{
+ int fd, ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(mount_setattr_idmapped) {
+};
+
+FIXTURE_SETUP(mount_setattr_idmapped)
+{
+ int img_fd = -EBADF;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
+ ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
+ img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
+ ASSERT_GE(img_fd, 0);
+ ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+ ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
+ ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
+ ASSERT_EQ(close(img_fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr_idmapped)
+{
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+/**
+ * Validate that negative fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_negative)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = -EBADF,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with negative fd");
+ }
+}
+
+/**
+ * Validate that excessively large fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_large)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = INT64_MAX,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with too large fd value");
+ }
+}
+
+/**
+ * Validate that closed fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_closed)
+{
+ int fd;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_GE(close(fd), 0);
+
+ attr.userns_fd = fd;
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with closed fd");
+ }
+}
+
+/**
+ * Validate that the initial user namespace is rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(errno, EPERM);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
+ unsigned long range)
+{
+ char map[100], procfile[256];
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+ return 0;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ return kill(getpid(), SIGSTOP);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ int ret;
+ pid_t pid;
+ char path[256];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids(pid, nsid, hostid, range);
+ if (ret < 0)
+ return ret;
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ ret = open(path, O_RDONLY | O_CLOEXEC);
+ kill(pid, SIGKILL);
+ wait_for_pid(pid);
+ return ret;
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that idmapping a mount is rejected if the mount's mount namespace
+ * and our mount namespace don't match.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+ sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that a detached mount not in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that currently changing the idmapping of an idmapped mount fails.
+ */
+TEST_F(mount_setattr_idmapped, change_idmapping)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+
+ /* Change idmapping on a detached mount that is already idmapped. */
+ attr.userns_fd = get_userns_fd(0, 20000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
+TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
+ AT_RECURSIVE |
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 74c69b75f6f5..7ed7cd95e58f 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
sleep() { read -t "$1" -N 1 || true; }
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
@@ -141,6 +141,19 @@ tests() {
n2 iperf3 -s -1 -B fd00::2 &
waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+
+ # TCP over IPv4, in parallel
+ for max in 4 5 50; do
+ local pids=( )
+ for ((i=0; i < max; ++i)) do
+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
+ done
+ for ((i=0; i < max; ++i)) do
+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+ done
+ wait "${pids[@]}"
+ done
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
index 57b20802e71b..b69de9263ee6 100644
--- a/tools/tracing/latency/latency-collector.c
+++ b/tools/tracing/latency/latency-collector.c
@@ -1650,7 +1650,7 @@ static void start_printthread(void)
if (ufd < 0 ||
read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
printf(
-"Warning! Using trivial random nummer seed, since %s not available\n",
+"Warning! Using trivial random number seed, since %s not available\n",
DEV_URANDOM);
fflush(stdout);
*seed = i;
@@ -1711,8 +1711,8 @@ static void show_usage(void)
"\t\t\tbeginning, end, and backtrace.\n\n"
"-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
-"\t\t\toption causes ftrace to show the functionph of how\n"
-"\t\t\tfunctions are calling other functions.\n\n"
+"\t\t\toption causes ftrace to show the graph of how functions\n"
+"\t\t\tare calling other functions.\n\n"
"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"