575 files changed, 28187 insertions, 4555 deletions
diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore
index 86485203c4ae..c45fb4ed4309 100644
--- a/tools/accounting/.gitignore
+++ b/tools/accounting/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 getdelays
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 8cb504d30384..5ef1c15e88ad 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
 	msg.g.version = 0x1;
 	na = (struct nlattr *) GENLMSG_DATA(&msg);
 	na->nla_type = nla_type;
-	na->nla_len = nla_len + 1 + NLA_HDRLEN;
+	na->nla_len = nla_len + NLA_HDRLEN;
 	memcpy(NLA_DATA(na), nla_data, nla_len);
 	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
 
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ebe1685e92dd..d5e517d1c3dd 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -512,6 +512,8 @@
 #define MSR_K7_HWCR			0xc0010015
 #define MSR_K7_HWCR_SMMLOCK_BIT		0
 #define MSR_K7_HWCR_SMMLOCK		BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT	30
+#define MSR_K7_HWCR_IRPERF_EN		BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
 #define MSR_K7_FID_VID_CTL		0xc0010041
 #define MSR_K7_FID_VID_STATUS		0xc0010042
 
diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
index cb52a3a8b8fc..4205ed4158bf 100644
--- a/tools/arch/x86/include/asm/unistd_64.h
+++ b/tools/arch/x86/include/asm/unistd_64.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NR_userfaultfd
+#define __NR_userfaultfd 282
+#endif
 #ifndef __NR_perf_event_open
 # define __NR_perf_event_open 298
 #endif
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 503d3f42da16..3f3f780c8c65 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING	0x00000008
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 53adc1762ec0..ec31f5b60323 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -366,7 +366,7 @@ AVXcode: 1
 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
 1c: Grp20 (1A),(1C)
 1d:
-1e:
+1e: Grp21 (1A)
 1f: NOP Ev
 # 0x0f 0x20-0x2f
 20: MOV Rd,Cd
@@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
 f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
 f2: ANDN Gy,By,Ey (v)
 f3: Grp17 (1A)
-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy
 f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
 f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
 f9: MOVDIRI My,Gy
@@ -970,7 +970,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -1041,8 +1041,8 @@ GrpTable: Grp15
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
 4: XSAVE | ptwrite Ey (F3),(11B)
-5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
+5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3)
 7: clflush | clflushopt (66) | sfence (11B)
 EndTable
 
@@ -1077,6 +1077,11 @@ GrpTable: Grp20
 0: cldemote Mb
 EndTable
 
+GrpTable: Grp21
+1: RDSSPD/Q Ry (F3),(11B)
+7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/tools/bootconfig/.gitignore b/tools/bootconfig/.gitignore
index e7644dfaa4a7..b77513cae685 100644
--- a/tools/bootconfig/.gitignore
+++ b/tools/bootconfig/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 bootconfig
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
index 59024197e71d..cf53342175e7 100644
--- a/tools/bpf/.gitignore
+++ b/tools/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 FEATURE-DUMP.bpf
 feature
 bpf_asm
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index b13926432b84..26cde83e1ca3 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,7 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.d
+/_bpftool
 /bpftool
 bpftool*.8
 bpf-helpers.*
 FEATURE-DUMP.bpftool
 feature
 libbpf
+profiler.skel.h
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 4d08f35034a2..b04156cfd7a3 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -19,19 +19,24 @@ SYNOPSIS
 FEATURE COMMANDS
 ================
 
-|	**bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]]
+|	**bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]]
 |	**bpftool** **feature help**
 |
 |	*COMPONENT* := { **kernel** | **dev** *NAME* }
 
 DESCRIPTION
 ===========
-	**bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]]
+	**bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
 		  Probe the running kernel and dump a number of eBPF-related
 		  parameters, such as availability of the **bpf()** system call,
 		  JIT status, eBPF program types availability, eBPF helper
 		  functions availability, and more.
 
+		  By default, bpftool **does not run probes** for
+		  **bpf_probe_write_user**\ () and **bpf_trace_printk**\()
+		  helpers which print warnings to kernel logs. To enable them
+		  and run all probes, the **full** keyword should be used.
+
 		  If the **macros** keyword (but not the **-j** option) is
 		  passed, a subset of the output is dumped as a list of
 		  **#define** macros that are ready to be included in a C
@@ -44,16 +49,12 @@ DESCRIPTION
 		  Keyword **kernel** can be omitted. If no probe target is
 		  specified, probing the kernel is the default behaviour.
 
-		  Note that when probed, some eBPF helpers (e.g.
-		  **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may
-		  print warnings to kernel logs.
-
-	**bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]]
+	**bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
 		  Probe network device for supported eBPF features and dump
 		  results to the console.
 
-		  The two keywords **macros** and **prefix** have the same
-		  role as when probing the kernel.
+		  The keywords **full**, **macros** and **prefix** have the
+		  same role as when probing the kernel.
 
 	**bpftool feature help**
 		  Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 64ddf8a4c518..9f19404f470e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -30,6 +30,7 @@ PROG COMMANDS
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
 |	**bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
+|	**bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs*
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -42,11 +43,15 @@ PROG COMMANDS
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
 |		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-|		**cgroup/getsockopt** | **cgroup/setsockopt**
+|		**cgroup/getsockopt** | **cgroup/setsockopt** |
+|		**struct_ops** | **fentry** | **fexit** | **freplace**
 |	}
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
 |	}
+|	*METRIC* := {
+|		**cycles** | **instructions** | **l1d_loads** | **llc_misses**
+|	}
 
 
 DESCRIPTION
@@ -188,6 +193,12 @@ DESCRIPTION
 		  not all of them can take the **ctx_in**/**ctx_out**
 		  arguments. bpftool does not perform checks on program types.
 
+	**bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
+		  Profile *METRICs* for bpf program *PROG* for *DURATION*
+		  seconds or until user hits Ctrl-C. *DURATION* is optional.
+		  If *DURATION* is not specified, the profiling will run up to
+		  UINT_MAX seconds.
+
 	**bpftool prog help**
 		  Print short help message.
 
@@ -310,6 +321,15 @@ EXAMPLES
 
 **# rm /sys/fs/bpf/xdp1**
 
+|
+| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
+
+::
+         51397 run_cnt
+      40176203 cycles                                                 (83.05%)
+      42518139 instructions    #   1.06 insns per cycle               (83.39%)
+           123 llc_misses      #   2.89 LLC misses per million insns  (83.15%)
+
 SEE ALSO
 ========
 	**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
new file mode 100644
index 000000000000..f045cc89dd6d
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -0,0 +1,116 @@
+==================
+bpftool-struct_ops
+==================
+-------------------------------------------------------------------------------
+tool to register/unregister/introspect BPF struct_ops
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
+
+	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** | **dump** | **register** | **unregister** | **help** }
+
+STRUCT_OPS COMMANDS
+===================
+
+|	**bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+|	**bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*]
+|	**bpftool** **struct_ops register** *OBJ*
+|	**bpftool** **struct_ops unregister** *STRUCT_OPS_MAP*
+|	**bpftool** **struct_ops help**
+|
+|	*STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* }
+|	*OBJ* := /a/file/of/bpf_struct_ops.o
+
+
+DESCRIPTION
+===========
+	**bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+		  Show brief information about the struct_ops in the system.
+		  If *STRUCT_OPS_MAP* is specified, it shows information only
+		  for the given struct_ops.  Otherwise, it lists all struct_ops
+		  currently existing in the system.
+
+		  Output will start with struct_ops map ID, followed by its map
+		  name and its struct_ops's kernel type.
+
+	**bpftool struct_ops dump** [*STRUCT_OPS_MAP*]
+		  Dump details information about the struct_ops in the system.
+		  If *STRUCT_OPS_MAP* is specified, it dumps information only
+		  for the given struct_ops.  Otherwise, it dumps all struct_ops
+		  currently existing in the system.
+
+	**bpftool struct_ops register** *OBJ*
+		  Register bpf struct_ops from *OBJ*.  All struct_ops under
+		  the ELF section ".struct_ops" will be registered to
+		  its kernel subsystem.
+
+	**bpftool struct_ops unregister**  *STRUCT_OPS_MAP*
+		  Unregister the *STRUCT_OPS_MAP* from the kernel subsystem.
+
+	**bpftool struct_ops help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-V, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+	-d, --debug
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool struct_ops show**
+
+::
+
+    100: dctcp           tcp_congestion_ops
+    105: cubic           tcp_congestion_ops
+
+**# bpftool struct_ops unregister id 105**
+
+::
+
+   Unregistered tcp_congestion_ops cubic id 105
+
+**# bpftool struct_ops register bpf_cubic.o**
+
+::
+
+   Registered tcp_congestion_ops cubic id 110
+
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-feature**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
+	**bpftool-gen**\ (8)
+	
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c4e810335810..f584d1fdfc64 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -59,10 +59,12 @@ LIBS = $(LIBBPF) -lelf -lz
 
 INSTALL ?= install
 RM ?= rm -f
+CLANG ?= clang
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \
+	clang-bpf-global-var
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -110,14 +112,39 @@ SRCS += $(BFD_SRCS)
 endif
 
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
+
+ifeq ($(feature-clang-bpf-global-var),1)
+	__OBJS = $(OBJS)
+else
+	__OBJS = $(_OBJS)
+endif
+
+$(OUTPUT)_prog.o: prog.c
+	$(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+
+$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+
+skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+	$(QUIET_CLANG)$(CLANG) \
+		-I$(srctree)/tools/include/uapi/ \
+		-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+		-g -O2 -target bpf -c $< -o $@
+
+profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
+	$(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+
+$(OUTPUT)prog.o: prog.c profiler.skel.h
+	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
 $(OUTPUT)feature.o: | zdep
 
-$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
@@ -125,6 +152,7 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
 	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 754d8395e451..45ee99b159e2 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -337,6 +337,7 @@ _bpftool()
 
             local PROG_TYPE='id pinned tag name'
             local MAP_TYPE='id pinned name'
+            local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
             case $command in
                 show|list)
                     [[ $prev != "$command" ]] && return 0
@@ -388,7 +389,7 @@ _bpftool()
                                     _bpftool_get_prog_ids
                                     ;;
                                 name)
-                                    _bpftool_get_map_names
+                                    _bpftool_get_prog_names
                                     ;;
                                 pinned)
                                     _filedir
@@ -469,7 +470,8 @@ _bpftool()
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
-                                cgroup/setsockopt" -- \
+                                cgroup/setsockopt struct_ops \
+                                fentry fexit freplace" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
@@ -497,9 +499,51 @@ _bpftool()
                 tracelog)
                     return 0
                     ;;
+                profile)
+                    case $cword in
+                        3)
+                            COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        4)
+                            case $prev in
+                                id)
+                                    _bpftool_get_prog_ids
+                                    ;;
+                                name)
+                                    _bpftool_get_prog_names
+                                    ;;
+                                pinned)
+                                    _filedir
+                                    ;;
+                            esac
+                            return 0
+                            ;;
+                        5)
+                            COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) )
+                            return 0
+                            ;;
+                        6)
+                            case $prev in
+                                duration)
+                                    return 0
+                                    ;;
+                                *)
+                                    COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+                                    return 0
+                                    ;;
+                            esac
+                            return 0
+                            ;;
+                        *)
+                            COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 run)
-                    if [[ ${#words[@]} -lt 5 ]]; then
-                        _filedir
+                    if [[ ${#words[@]} -eq 4 ]]; then
+                        COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
                         return 0
                     fi
                     case $prev in
@@ -507,6 +551,10 @@ _bpftool()
                             _bpftool_get_prog_ids
                             return 0
                             ;;
+                        name)
+                            _bpftool_get_prog_names
+                            return 0
+                            ;;
                         data_in|data_out|ctx_in|ctx_out)
                             _filedir
                             return 0
@@ -524,7 +572,35 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'dump help pin attach detach \
-                            load loadall show list tracelog run' -- "$cur" ) )
+                            load loadall show list tracelog run profile' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
+        struct_ops)
+            local STRUCT_OPS_TYPE='id name'
+            case $command in
+                show|list|dump|unregister)
+                    case $prev in
+                        $command)
+                            COMPREPLY=( $( compgen -W "$STRUCT_OPS_TYPE" -- "$cur" ) )
+                            ;;
+                        id)
+                            _bpftool_get_map_ids_for_type struct_ops
+                            ;;
+                        name)
+                            _bpftool_get_map_names_for_type struct_ops
+                            ;;
+                    esac
+                    return 0
+                    ;;
+                register)
+                    _filedir
+                    return 0
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'register unregister show list dump help' \
+                            -- "$cur" ) )
                     ;;
             esac
             ;;
@@ -712,11 +788,17 @@ _bpftool()
                     esac
                     ;;
                 pin)
-                    if [[ $prev == "$command" ]]; then
-                        COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
-                    else
-                        _filedir
-                    fi
+                    case $prev in
+                        $command)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            ;;
+                        id)
+                            _bpftool_get_map_ids
+                            ;;
+                        name)
+                            _bpftool_get_map_names
+                            ;;
+                    esac
                     return 0
                     ;;
                 event_pipe)
@@ -843,7 +925,7 @@ _bpftool()
             case $command in
                 skeleton)
                     _filedir
-		    ;;
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
@@ -943,6 +1025,9 @@ _bpftool()
                                 id)
                                     _bpftool_get_prog_ids
                                     ;;
+                                name)
+                                    _bpftool_get_prog_names
+                                    ;;
                                 pinned)
                                     _filedir
                                     ;;
@@ -983,11 +1068,12 @@ _bpftool()
                 probe)
                     [[ $prev == "prefix" ]] && return 0
                     if _bpftool_search_list 'macros'; then
-                        COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
+                        _bpftool_once_attr 'prefix'
                     else
                         COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
                     fi
                     _bpftool_one_of_list 'kernel dev'
+                    _bpftool_once_attr 'full'
                     return 0
                     ;;
                 *)
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index b3745ed711ba..bcaf55b59498 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf,
 	if (IS_ERR(d))
 		return PTR_ERR(d);
 
+	printf("#ifndef __VMLINUX_H__\n");
+	printf("#define __VMLINUX_H__\n");
+	printf("\n");
 	printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
 	printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
 	printf("#endif\n\n");
@@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf,
 	printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
 	printf("#pragma clang attribute pop\n");
 	printf("#endif\n");
+	printf("\n");
+	printf("#endif /* __VMLINUX_H__ */\n");
 
 done:
 	btf_dump__free(d);
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 01cc52b834fa..497807bec675 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -4,11 +4,13 @@
 #include <ctype.h>
 #include <stdio.h> /* for (FILE *) used by json_writer */
 #include <string.h>
+#include <unistd.h>
 #include <asm/byteorder.h>
 #include <linux/bitops.h>
 #include <linux/btf.h>
 #include <linux/err.h>
 #include <bpf/btf.h>
+#include <bpf/bpf.h>
 
 #include "json_writer.h"
 #include "main.h"
@@ -22,13 +24,102 @@
 static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 			      __u8 bit_offset, const void *data);
 
-static void btf_dumper_ptr(const void *data, json_writer_t *jw,
-			   bool is_plain_text)
+static int btf_dump_func(const struct btf *btf, char *func_sig,
+			 const struct btf_type *func_proto,
+			 const struct btf_type *func, int pos, int size);
+
+static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
+				    const struct btf_type *func_proto,
+				    __u32 prog_id)
 {
-	if (is_plain_text)
-		jsonw_printf(jw, "%p", *(void **)data);
+	struct bpf_prog_info_linear *prog_info = NULL;
+	const struct btf_type *func_type;
+	const char *prog_name = NULL;
+	struct bpf_func_info *finfo;
+	struct btf *prog_btf = NULL;
+	struct bpf_prog_info *info;
+	int prog_fd, func_sig_len;
+	char prog_str[1024];
+
+	/* Get the ptr's func_proto */
+	func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0,
+				     sizeof(prog_str));
+	if (func_sig_len == -1)
+		return -1;
+
+	if (!prog_id)
+		goto print;
+
+	/* Get the bpf_prog's name.  Obtain from func_info. */
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd == -1)
+		goto print;
+
+	prog_info = bpf_program__get_prog_info_linear(prog_fd,
+						1UL << BPF_PROG_INFO_FUNC_INFO);
+	close(prog_fd);
+	if (IS_ERR(prog_info)) {
+		prog_info = NULL;
+		goto print;
+	}
+	info = &prog_info->info;
+
+	if (!info->btf_id || !info->nr_func_info ||
+	    btf__get_from_id(info->btf_id, &prog_btf))
+		goto print;
+	finfo = (struct bpf_func_info *)info->func_info;
+	func_type = btf__type_by_id(prog_btf, finfo->type_id);
+	if (!func_type || !btf_is_func(func_type))
+		goto print;
+
+	prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+print:
+	if (!prog_id)
+		snprintf(&prog_str[func_sig_len],
+			 sizeof(prog_str) - func_sig_len, " 0");
+	else if (prog_name)
+		snprintf(&prog_str[func_sig_len],
+			 sizeof(prog_str) - func_sig_len,
+			 " %s/prog_id:%u", prog_name, prog_id);
 	else
-		jsonw_printf(jw, "%lu", *(unsigned long *)data);
+		snprintf(&prog_str[func_sig_len],
+			 sizeof(prog_str) - func_sig_len,
+			 " <unknown_prog_name>/prog_id:%u", prog_id);
+
+	prog_str[sizeof(prog_str) - 1] = '\0';
+	jsonw_string(d->jw, prog_str);
+	btf__free(prog_btf);
+	free(prog_info);
+	return 0;
+}
+
+static void btf_dumper_ptr(const struct btf_dumper *d,
+			   const struct btf_type *t,
+			   const void *data)
+{
+	unsigned long value = *(unsigned long *)data;
+	const struct btf_type *ptr_type;
+	__s32 ptr_type_id;
+
+	if (!d->prog_id_as_func_ptr || value > UINT32_MAX)
+		goto print_ptr_value;
+
+	ptr_type_id = btf__resolve_type(d->btf, t->type);
+	if (ptr_type_id < 0)
+		goto print_ptr_value;
+	ptr_type = btf__type_by_id(d->btf, ptr_type_id);
+	if (!ptr_type || !btf_is_func_proto(ptr_type))
+		goto print_ptr_value;
+
+	if (!dump_prog_id_as_func_ptr(d, ptr_type, value))
+		return;
+
+print_ptr_value:
+	if (d->is_plain_text)
+		jsonw_printf(d->jw, "%p", (void *)value);
+	else
+		jsonw_printf(d->jw, "%lu", value);
 }
 
 static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
@@ -43,9 +134,78 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
 	return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
 }
 
-static void btf_dumper_enum(const void *data, json_writer_t *jw)
+static int btf_dumper_enum(const struct btf_dumper *d,
+			    const struct btf_type *t,
+			    const void *data)
+{
+	const struct btf_enum *enums = btf_enum(t);
+	__s64 value;
+	__u16 i;
+
+	switch (t->size) {
+	case 8:
+		value = *(__s64 *)data;
+		break;
+	case 4:
+		value = *(__s32 *)data;
+		break;
+	case 2:
+		value = *(__s16 *)data;
+		break;
+	case 1:
+		value = *(__s8 *)data;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < btf_vlen(t); i++) {
+		if (value == enums[i].val) {
+			jsonw_string(d->jw,
+				     btf__name_by_offset(d->btf,
+							 enums[i].name_off));
+			return 0;
+		}
+	}
+
+	jsonw_int(d->jw, value);
+	return 0;
+}
+
+static bool is_str_array(const struct btf *btf, const struct btf_array *arr,
+			 const char *s)
 {
-	jsonw_printf(jw, "%d", *(int *)data);
+	const struct btf_type *elem_type;
+	const char *end_s;
+
+	if (!arr->nelems)
+		return false;
+
+	elem_type = btf__type_by_id(btf, arr->type);
+	/* Not skipping typedef.  typedef to char does not count as
+	 * a string now.
+	 */
+	while (elem_type && btf_is_mod(elem_type))
+		elem_type = btf__type_by_id(btf, elem_type->type);
+
+	if (!elem_type || !btf_is_int(elem_type) || elem_type->size != 1)
+		return false;
+
+	if (btf_int_encoding(elem_type) != BTF_INT_CHAR &&
+	    strcmp("char", btf__name_by_offset(btf, elem_type->name_off)))
+		return false;
+
+	end_s = s + arr->nelems;
+	while (s < end_s) {
+		if (!*s)
+			return true;
+		if (*s <= 0x1f || *s >= 0x7f)
+			return false;
+		s++;
+	}
+
+	/* '\0' is not found */
+	return false;
 }
 
 static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
@@ -57,6 +217,11 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
 	int ret = 0;
 	__u32 i;
 
+	if (is_str_array(d->btf, arr, data)) {
+		jsonw_string(d->jw, data);
+		return 0;
+	}
+
 	elem_size = btf__resolve_size(d->btf, arr->type);
 	if (elem_size < 0)
 		return elem_size;
@@ -366,10 +531,9 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 	case BTF_KIND_ARRAY:
 		return btf_dumper_array(d, type_id, data);
 	case BTF_KIND_ENUM:
-		btf_dumper_enum(data, d->jw);
-		return 0;
+		return btf_dumper_enum(d, t, data);
 	case BTF_KIND_PTR:
-		btf_dumper_ptr(data, d->jw, d->is_plain_text);
+		btf_dumper_ptr(d, t, data);
 		return 0;
 	case BTF_KIND_UNKN:
 		jsonw_printf(d->jw, "(unknown)");
@@ -414,10 +578,6 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 			return -1;					\
 	} while (0)
 
-static int btf_dump_func(const struct btf *btf, char *func_sig,
-			 const struct btf_type *func_proto,
-			 const struct btf_type *func, int pos, int size);
-
 static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
 				  char *func_sig, int pos, int size)
 {
@@ -526,8 +686,15 @@ static int btf_dump_func(const struct btf *btf, char *func_sig,
 			BTF_PRINT_ARG(", ");
 		if (arg->type) {
 			BTF_PRINT_TYPE(arg->type);
-			BTF_PRINT_ARG("%s",
-				      btf__name_by_offset(btf, arg->name_off));
+			if (arg->name_off)
+				BTF_PRINT_ARG("%s",
+					      btf__name_by_offset(btf, arg->name_off));
+			else if (pos && func_sig[pos - 1] == ' ')
+				/* Remove unnecessary space for
+				 * FUNC_PROTO that does not have
+				 * arg->name_off
+				 */
+				func_sig[--pos] = '\0';
 		} else {
 			BTF_PRINT_ARG("...");
 		}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b75b8ec5469c..f2223dbdfb0a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name)
 	return err;
 }
 
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
 {
-	unsigned int id;
-	char *endptr;
 	int err;
 	int fd;
 
-	if (argc < 3) {
-		p_err("too few arguments, id ID and FILE path is required");
-		return -1;
-	} else if (argc > 3) {
-		p_err("too many arguments");
-		return -1;
-	}
-
-	if (!is_prefix(*argv, "id")) {
-		p_err("expected 'id' got %s", *argv);
-		return -1;
-	}
-	NEXT_ARG();
-
-	id = strtoul(*argv, &endptr, 0);
-	if (*endptr) {
-		p_err("can't parse %s as ID", *argv);
-		return -1;
-	}
-	NEXT_ARG();
-
-	fd = get_fd_by_id(id);
-	if (fd < 0) {
-		p_err("can't open object by id (%u): %s", id, strerror(errno));
-		return -1;
-	}
+	fd = get_fd(&argc, &argv);
+	if (fd < 0)
+		return fd;
 
 	err = do_pin_fd(fd, *argv);
 
@@ -597,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
 
 	return 0;
 }
+
+int __printf(2, 0)
+print_all_levels(__maybe_unused enum libbpf_print_level level,
+		 const char *format, va_list args)
+{
+	return vfprintf(stderr, format, args);
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 941873d778d8..88718ee6a438 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -112,18 +112,12 @@ print_start_section(const char *json_title, const char *plain_title,
 	}
 }
 
-static void
-print_end_then_start_section(const char *json_title, const char *plain_title,
-			     const char *define_comment,
-			     const char *define_prefix)
+static void print_end_section(void)
 {
 	if (json_output)
 		jsonw_end_object(json_wtr);
 	else
 		printf("\n");
-
-	print_start_section(json_title, plain_title, define_comment,
-			    define_prefix);
 }
 
 /* Probing functions */
@@ -520,13 +514,38 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
 }
 
 static void
+probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
+			  const char *define_prefix, unsigned int id,
+			  const char *ptype_name, __u32 ifindex)
+{
+	bool res;
+
+	if (!supported_type)
+		res = false;
+	else
+		res = bpf_probe_helper(id, prog_type, ifindex);
+
+	if (json_output) {
+		if (res)
+			jsonw_string(json_wtr, helper_name[id]);
+	} else if (define_prefix) {
+		printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
+		       define_prefix, ptype_name, helper_name[id],
+		       res ? "1" : "0");
+	} else {
+		if (res)
+			printf("\n\t- %s", helper_name[id]);
+	}
+}
+
+static void
 probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
-			   const char *define_prefix, __u32 ifindex)
+			   const char *define_prefix, bool full_mode,
+			   __u32 ifindex)
 {
 	const char *ptype_name = prog_type_name[prog_type];
 	char feat_name[128];
 	unsigned int id;
-	bool res;
 
 	if (ifindex)
 		/* Only test helpers for offload-able program types */
@@ -548,21 +567,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
 	}
 
 	for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
-		if (!supported_type)
-			res = false;
-		else
-			res = bpf_probe_helper(id, prog_type, ifindex);
-
-		if (json_output) {
-			if (res)
-				jsonw_string(json_wtr, helper_name[id]);
-		} else if (define_prefix) {
-			printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
-			       define_prefix, ptype_name, helper_name[id],
-			       res ? "1" : "0");
-		} else {
-			if (res)
-				printf("\n\t- %s", helper_name[id]);
+		/* Skip helper functions which emit dmesg messages when not in
+		 * the full mode.
+		 */
+		switch (id) {
+		case BPF_FUNC_trace_printk:
+		case BPF_FUNC_probe_write_user:
+			if (!full_mode)
+				continue;
+			/* fallthrough */
+		default:
+			probe_helper_for_progtype(prog_type, supported_type,
+						  define_prefix, id, ptype_name,
+						  ifindex);
 		}
 	}
 
@@ -584,13 +601,132 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
 			   res, define_prefix);
 }
 
+static void
+section_system_config(enum probe_component target, const char *define_prefix)
+{
+	switch (target) {
+	case COMPONENT_KERNEL:
+	case COMPONENT_UNSPEC:
+		if (define_prefix)
+			break;
+
+		print_start_section("system_config",
+				    "Scanning system configuration...",
+				    NULL, /* define_comment never used here */
+				    NULL); /* define_prefix always NULL here */
+		if (check_procfs()) {
+			probe_unprivileged_disabled();
+			probe_jit_enable();
+			probe_jit_harden();
+			probe_jit_kallsyms();
+			probe_jit_limit();
+		} else {
+			p_info("/* procfs not mounted, skipping related probes */");
+		}
+		probe_kernel_image_config();
+		print_end_section();
+		break;
+	default:
+		break;
+	}
+}
+
+static bool section_syscall_config(const char *define_prefix)
+{
+	bool res;
+
+	print_start_section("syscall_config",
+			    "Scanning system call availability...",
+			    "/*** System call availability ***/",
+			    define_prefix);
+	res = probe_bpf_syscall(define_prefix);
+	print_end_section();
+
+	return res;
+}
+
+static void
+section_program_types(bool *supported_types, const char *define_prefix,
+		      __u32 ifindex)
+{
+	unsigned int i;
+
+	print_start_section("program_types",
+			    "Scanning eBPF program types...",
+			    "/*** eBPF program types ***/",
+			    define_prefix);
+
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+		probe_prog_type(i, supported_types, define_prefix, ifindex);
+
+	print_end_section();
+}
+
+static void section_map_types(const char *define_prefix, __u32 ifindex)
+{
+	unsigned int i;
+
+	print_start_section("map_types",
+			    "Scanning eBPF map types...",
+			    "/*** eBPF map types ***/",
+			    define_prefix);
+
+	for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
+		probe_map_type(i, define_prefix, ifindex);
+
+	print_end_section();
+}
+
+static void
+section_helpers(bool *supported_types, const char *define_prefix,
+		bool full_mode, __u32 ifindex)
+{
+	unsigned int i;
+
+	print_start_section("helpers",
+			    "Scanning eBPF helper functions...",
+			    "/*** eBPF helper functions ***/",
+			    define_prefix);
+
+	if (define_prefix)
+		printf("/*\n"
+		       " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
+		       " * to determine if <helper_name> is available for <prog_type_name>,\n"
+		       " * e.g.\n"
+		       " *	#if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
+		       " *		// do stuff with this helper\n"
+		       " *	#elif\n"
+		       " *		// use a workaround\n"
+		       " *	#endif\n"
+		       " */\n"
+		       "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper)	\\\n"
+		       "	%sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
+		       define_prefix, define_prefix, define_prefix,
+		       define_prefix);
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+		probe_helpers_for_progtype(i, supported_types[i],
+					   define_prefix, full_mode, ifindex);
+
+	print_end_section();
+}
+
+static void section_misc(const char *define_prefix, __u32 ifindex)
+{
+	print_start_section("misc",
+			    "Scanning miscellaneous eBPF features...",
+			    "/*** eBPF misc features ***/",
+			    define_prefix);
+	probe_large_insn_limit(define_prefix, ifindex);
+	print_end_section();
+}
+
 static int do_probe(int argc, char **argv)
 {
 	enum probe_component target = COMPONENT_UNSPEC;
 	const char *define_prefix = NULL;
 	bool supported_types[128] = {};
+	bool full_mode = false;
 	__u32 ifindex = 0;
-	unsigned int i;
 	char *ifname;
 
 	/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
@@ -629,6 +765,9 @@ static int do_probe(int argc, char **argv)
 				      strerror(errno));
 				return -1;
 			}
+		} else if (is_prefix(*argv, "full")) {
+			full_mode = true;
+			NEXT_ARG();
 		} else if (is_prefix(*argv, "macros") && !define_prefix) {
 			define_prefix = "";
 			NEXT_ARG();
@@ -658,97 +797,19 @@ static int do_probe(int argc, char **argv)
 		jsonw_start_object(json_wtr);
 	}
 
-	switch (target) {
-	case COMPONENT_KERNEL:
-	case COMPONENT_UNSPEC:
-		if (define_prefix)
-			break;
-
-		print_start_section("system_config",
-				    "Scanning system configuration...",
-				    NULL, /* define_comment never used here */
-				    NULL); /* define_prefix always NULL here */
-		if (check_procfs()) {
-			probe_unprivileged_disabled();
-			probe_jit_enable();
-			probe_jit_harden();
-			probe_jit_kallsyms();
-			probe_jit_limit();
-		} else {
-			p_info("/* procfs not mounted, skipping related probes */");
-		}
-		probe_kernel_image_config();
-		if (json_output)
-			jsonw_end_object(json_wtr);
-		else
-			printf("\n");
-		break;
-	default:
-		break;
-	}
-
-	print_start_section("syscall_config",
-			    "Scanning system call availability...",
-			    "/*** System call availability ***/",
-			    define_prefix);
-
-	if (!probe_bpf_syscall(define_prefix))
+	section_system_config(target, define_prefix);
+	if (!section_syscall_config(define_prefix))
 		/* bpf() syscall unavailable, don't probe other BPF features */
 		goto exit_close_json;
-
-	print_end_then_start_section("program_types",
-				     "Scanning eBPF program types...",
-				     "/*** eBPF program types ***/",
-				     define_prefix);
-
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
-		probe_prog_type(i, supported_types, define_prefix, ifindex);
-
-	print_end_then_start_section("map_types",
-				     "Scanning eBPF map types...",
-				     "/*** eBPF map types ***/",
-				     define_prefix);
-
-	for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
-		probe_map_type(i, define_prefix, ifindex);
-
-	print_end_then_start_section("helpers",
-				     "Scanning eBPF helper functions...",
-				     "/*** eBPF helper functions ***/",
-				     define_prefix);
-
-	if (define_prefix)
-		printf("/*\n"
-		       " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
-		       " * to determine if <helper_name> is available for <prog_type_name>,\n"
-		       " * e.g.\n"
-		       " *	#if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
-		       " *		// do stuff with this helper\n"
-		       " *	#elif\n"
-		       " *		// use a workaround\n"
-		       " *	#endif\n"
-		       " */\n"
-		       "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper)	\\\n"
-		       "	%sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
-		       define_prefix, define_prefix, define_prefix,
-		       define_prefix);
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
-		probe_helpers_for_progtype(i, supported_types[i],
-					   define_prefix, ifindex);
-
-	print_end_then_start_section("misc",
-				     "Scanning miscellaneous eBPF features...",
-				     "/*** eBPF misc features ***/",
-				     define_prefix);
-	probe_large_insn_limit(define_prefix, ifindex);
+	section_program_types(supported_types, define_prefix, ifindex);
+	section_map_types(define_prefix, ifindex);
+	section_helpers(supported_types, define_prefix, full_mode, ifindex);
+	section_misc(define_prefix, ifindex);
 
 exit_close_json:
-	if (json_output) {
-		/* End current "section" of probes */
-		jsonw_end_object(json_wtr);
+	if (json_output)
 		/* End root object */
 		jsonw_end_object(json_wtr);
-	}
 
 	return 0;
 }
@@ -761,7 +822,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n"
+		"Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n"
 		"       %s %s help\n"
 		"\n"
 		"       COMPONENT := { kernel | dev NAME }\n"
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 6d41bbfc6459..466c269eabdd 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -58,7 +58,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n"
+		"       OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -79,13 +79,6 @@ static int do_version(int argc, char **argv)
 	return 0;
 }
 
-static int __printf(2, 0)
-print_all_levels(__maybe_unused enum libbpf_print_level level,
-		 const char *format, va_list args)
-{
-	return vfprintf(stderr, format, args);
-}
-
 int cmd_select(const struct cmd *cmds, int argc, char **argv,
 	       int (*help)(int argc, char **argv))
 {
@@ -228,6 +221,7 @@ static const struct cmd cmds[] = {
 	{ "feature",	do_feature },
 	{ "btf",	do_btf },
 	{ "gen",	do_gen },
+	{ "struct_ops",	do_struct_ops },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 4e75b58d3989..86f14ce26fd7 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -14,6 +14,8 @@
 #include <linux/hashtable.h>
 #include <tools/libc_compat.h>
 
+#include <bpf/libbpf.h>
+
 #include "json_writer.h"
 
 #define ptr_to_u64(ptr)	((__u64)(unsigned long)(ptr))
@@ -76,6 +78,9 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
 	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
 	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
+	[BPF_PROG_TYPE_TRACING]			= "tracing",
+	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
+	[BPF_PROG_TYPE_EXT]			= "ext",
 };
 
 extern const char * const map_type_name[];
@@ -143,7 +148,7 @@ char *get_fdinfo(int fd, const char *key);
 int open_obj_pinned(char *path, bool quiet);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
 int mount_bpffs_for_pin(const char *name);
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
 int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
@@ -156,6 +161,7 @@ int do_tracelog(int argc, char **arg);
 int do_feature(int argc, char **argv);
 int do_btf(int argc, char **argv);
 int do_gen(int argc, char **argv);
+int do_struct_ops(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
@@ -200,6 +206,7 @@ struct btf_dumper {
 	const struct btf *btf;
 	json_writer_t *jw;
 	bool is_plain_text;
+	bool prog_id_as_func_ptr;
 };
 
 /* btf_dumper_type - print data along with type information
@@ -226,4 +233,7 @@ struct tcmsg;
 int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
 int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
 		   const char *devname, int ifindex);
+
+int print_all_levels(__maybe_unused enum libbpf_print_level level,
+		     const char *format, va_list args);
 #endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e6c85680b34d..693a632f6813 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv)
 {
 	int err;
 
-	err = do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+	err = do_pin_any(argc, argv, map_parse_fd);
 	if (!err && json_output)
 		jsonw_null(json_wtr);
 	return err;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b352ab041160..f6a5974a7b0a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -4,6 +4,7 @@
 #define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
+#include <signal.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -11,10 +12,13 @@
 #include <time.h>
 #include <unistd.h>
 #include <net/if.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 
 #include <linux/err.h>
+#include <linux/perf_event.h>
 #include <linux/sizes.h>
 
 #include <bpf/bpf.h>
@@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv)
 {
 	int err;
 
-	err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+	err = do_pin_any(argc, argv, prog_parse_fd);
 	if (!err && json_output)
 		jsonw_null(json_wtr);
 	return err;
@@ -1243,6 +1247,25 @@ free_data_in:
 	return err;
 }
 
+static int
+get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+		      enum bpf_attach_type *expected_attach_type)
+{
+	libbpf_print_fn_t print_backup;
+	int ret;
+
+	ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+	if (!ret)
+		return ret;
+
+	/* libbpf_prog_type_by_name() failed, let's re-run with debug level */
+	print_backup = libbpf_set_print(print_all_levels);
+	ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+	libbpf_set_print(print_backup);
+
+	return ret;
+}
+
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
 	enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
@@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 			strcat(type, *argv);
 			strcat(type, "/");
 
-			err = libbpf_prog_type_by_name(type, &common_prog_type,
-						       &expected_attach_type);
+			err = get_prog_type_by_name(type, &common_prog_type,
+						    &expected_attach_type);
 			free(type);
 			if (err < 0)
 				goto err_free_reuse_maps;
@@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 		if (prog_type == BPF_PROG_TYPE_UNSPEC) {
 			const char *sec_name = bpf_program__title(pos, false);
 
-			err = libbpf_prog_type_by_name(sec_name, &prog_type,
-						       &expected_attach_type);
+			err = get_prog_type_by_name(sec_name, &prog_type,
+						    &expected_attach_type);
 			if (err < 0)
 				goto err_close_obj;
 		}
@@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv)
 	return load_with_options(argc, argv, false);
 }
 
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+static int do_profile(int argc, char **argv)
+{
+	p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0");
+	return 0;
+}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "profiler.skel.h"
+
+struct profile_metric {
+	const char *name;
+	struct bpf_perf_event_value val;
+	struct perf_event_attr attr;
+	bool selected;
+
+	/* calculate ratios like instructions per cycle */
+	const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */
+	const char *ratio_desc;
+	const float ratio_mul;
+} metrics[] = {
+	{
+		.name = "cycles",
+		.attr = {
+			.type = PERF_TYPE_HARDWARE,
+			.config = PERF_COUNT_HW_CPU_CYCLES,
+			.exclude_user = 1,
+		},
+	},
+	{
+		.name = "instructions",
+		.attr = {
+			.type = PERF_TYPE_HARDWARE,
+			.config = PERF_COUNT_HW_INSTRUCTIONS,
+			.exclude_user = 1,
+		},
+		.ratio_metric = 1,
+		.ratio_desc = "insns per cycle",
+		.ratio_mul = 1.0,
+	},
+	{
+		.name = "l1d_loads",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_L1D |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+			.exclude_user = 1,
+		},
+	},
+	{
+		.name = "llc_misses",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_LL |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+			.exclude_user = 1
+		},
+		.ratio_metric = 2,
+		.ratio_desc = "LLC misses per million insns",
+		.ratio_mul = 1e6,
+	},
+};
+
+static __u64 profile_total_count;
+
+#define MAX_NUM_PROFILE_METRICS 4
+
+static int profile_parse_metrics(int argc, char **argv)
+{
+	unsigned int metric_cnt;
+	int selected_cnt = 0;
+	unsigned int i;
+
+	metric_cnt = sizeof(metrics) / sizeof(struct profile_metric);
+
+	while (argc > 0) {
+		for (i = 0; i < metric_cnt; i++) {
+			if (is_prefix(argv[0], metrics[i].name)) {
+				if (!metrics[i].selected)
+					selected_cnt++;
+				metrics[i].selected = true;
+				break;
+			}
+		}
+		if (i == metric_cnt) {
+			p_err("unknown metric %s", argv[0]);
+			return -1;
+		}
+		NEXT_ARG();
+	}
+	if (selected_cnt > MAX_NUM_PROFILE_METRICS) {
+		p_err("too many (%d) metrics, please specify no more than %d metrics at at time",
+		      selected_cnt, MAX_NUM_PROFILE_METRICS);
+		return -1;
+	}
+	return selected_cnt;
+}
+
+static void profile_read_values(struct profiler_bpf *obj)
+{
+	__u32 m, cpu, num_cpu = obj->rodata->num_cpu;
+	int reading_map_fd, count_map_fd;
+	__u64 counts[num_cpu];
+	__u32 key = 0;
+	int err;
+
+	reading_map_fd = bpf_map__fd(obj->maps.accum_readings);
+	count_map_fd = bpf_map__fd(obj->maps.counts);
+	if (reading_map_fd < 0 || count_map_fd < 0) {
+		p_err("failed to get fd for map");
+		return;
+	}
+
+	err = bpf_map_lookup_elem(count_map_fd, &key, counts);
+	if (err) {
+		p_err("failed to read count_map: %s", strerror(errno));
+		return;
+	}
+
+	profile_total_count = 0;
+	for (cpu = 0; cpu < num_cpu; cpu++)
+		profile_total_count += counts[cpu];
+
+	for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+		struct bpf_perf_event_value values[num_cpu];
+
+		if (!metrics[m].selected)
+			continue;
+
+		err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+		if (err) {
+			p_err("failed to read reading_map: %s",
+			      strerror(errno));
+			return;
+		}
+		for (cpu = 0; cpu < num_cpu; cpu++) {
+			metrics[m].val.counter += values[cpu].counter;
+			metrics[m].val.enabled += values[cpu].enabled;
+			metrics[m].val.running += values[cpu].running;
+		}
+		key++;
+	}
+}
+
+static void profile_print_readings_json(void)
+{
+	__u32 m;
+
+	jsonw_start_array(json_wtr);
+	for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+		if (!metrics[m].selected)
+			continue;
+		jsonw_start_object(json_wtr);
+		jsonw_string_field(json_wtr, "metric", metrics[m].name);
+		jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count);
+		jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter);
+		jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled);
+		jsonw_lluint_field(json_wtr, "running", metrics[m].val.running);
+
+		jsonw_end_object(json_wtr);
+	}
+	jsonw_end_array(json_wtr);
+}
+
+static void profile_print_readings_plain(void)
+{
+	__u32 m;
+
+	printf("\n%18llu %-20s\n", profile_total_count, "run_cnt");
+	for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+		struct bpf_perf_event_value *val = &metrics[m].val;
+		int r;
+
+		if (!metrics[m].selected)
+			continue;
+		printf("%18llu %-20s", val->counter, metrics[m].name);
+
+		r = metrics[m].ratio_metric - 1;
+		if (r >= 0 && metrics[r].selected &&
+		    metrics[r].val.counter > 0) {
+			printf("# %8.2f %-30s",
+			       val->counter * metrics[m].ratio_mul /
+			       metrics[r].val.counter,
+			       metrics[m].ratio_desc);
+		} else {
+			printf("%-41s", "");
+		}
+
+		if (val->enabled > val->running)
+			printf("(%4.2f%%)",
+			       val->running * 100.0 / val->enabled);
+		printf("\n");
+	}
+}
+
+static void profile_print_readings(void)
+{
+	if (json_output)
+		profile_print_readings_json();
+	else
+		profile_print_readings_plain();
+}
+
+static char *profile_target_name(int tgt_fd)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_func_info *func_info;
+	const struct btf_type *t;
+	char *name = NULL;
+	struct btf *btf;
+
+	info_linear = bpf_program__get_prog_info_linear(
+		tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		p_err("failed to get info_linear for prog FD %d", tgt_fd);
+		return NULL;
+	}
+
+	if (info_linear->info.btf_id == 0 ||
+	    btf__get_from_id(info_linear->info.btf_id, &btf)) {
+		p_err("prog FD %d doesn't have valid btf", tgt_fd);
+		goto out;
+	}
+
+	func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+	t = btf__type_by_id(btf, func_info[0].type_id);
+	if (!t) {
+		p_err("btf %d doesn't have type %d",
+		      info_linear->info.btf_id, func_info[0].type_id);
+		goto out;
+	}
+	name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+	free(info_linear);
+	return name;
+}
+
+static struct profiler_bpf *profile_obj;
+static int profile_tgt_fd = -1;
+static char *profile_tgt_name;
+static int *profile_perf_events;
+static int profile_perf_event_cnt;
+
+static void profile_close_perf_events(struct profiler_bpf *obj)
+{
+	int i;
+
+	for (i = profile_perf_event_cnt - 1; i >= 0; i--)
+		close(profile_perf_events[i]);
+
+	free(profile_perf_events);
+	profile_perf_event_cnt = 0;
+}
+
+static int profile_open_perf_events(struct profiler_bpf *obj)
+{
+	unsigned int cpu, m;
+	int map_fd, pmu_fd;
+
+	profile_perf_events = calloc(
+		sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+	if (!profile_perf_events) {
+		p_err("failed to allocate memory for perf_event array: %s",
+		      strerror(errno));
+		return -1;
+	}
+	map_fd = bpf_map__fd(obj->maps.events);
+	if (map_fd < 0) {
+		p_err("failed to get fd for events map");
+		return -1;
+	}
+
+	for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+		if (!metrics[m].selected)
+			continue;
+		for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
+			pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
+					 -1/*pid*/, cpu, -1/*group_fd*/, 0);
+			if (pmu_fd < 0 ||
+			    bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
+						&pmu_fd, BPF_ANY) ||
+			    ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+				p_err("failed to create event %s on cpu %d",
+				      metrics[m].name, cpu);
+				return -1;
+			}
+			profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
+		}
+	}
+	return 0;
+}
+
+static void profile_print_and_cleanup(void)
+{
+	profile_close_perf_events(profile_obj);
+	profile_read_values(profile_obj);
+	profile_print_readings();
+	profiler_bpf__destroy(profile_obj);
+
+	close(profile_tgt_fd);
+	free(profile_tgt_name);
+}
+
+static void int_exit(int signo)
+{
+	profile_print_and_cleanup();
+	exit(0);
+}
+
+static int do_profile(int argc, char **argv)
+{
+	int num_metric, num_cpu, err = -1;
+	struct bpf_program *prog;
+	unsigned long duration;
+	char *endptr;
+
+	/* we at least need two args for the prog and one metric */
+	if (!REQ_ARGS(3))
+		return -EINVAL;
+
+	/* parse target fd */
+	profile_tgt_fd = prog_parse_fd(&argc, &argv);
+	if (profile_tgt_fd < 0) {
+		p_err("failed to parse fd");
+		return -1;
+	}
+
+	/* parse profiling optional duration */
+	if (argc > 2 && is_prefix(argv[0], "duration")) {
+		NEXT_ARG();
+		duration = strtoul(*argv, &endptr, 0);
+		if (*endptr)
+			usage();
+		NEXT_ARG();
+	} else {
+		duration = UINT_MAX;
+	}
+
+	num_metric = profile_parse_metrics(argc, argv);
+	if (num_metric <= 0)
+		goto out;
+
+	num_cpu = libbpf_num_possible_cpus();
+	if (num_cpu <= 0) {
+		p_err("failed to identify number of CPUs");
+		goto out;
+	}
+
+	profile_obj = profiler_bpf__open();
+	if (!profile_obj) {
+		p_err("failed to open and/or load BPF object");
+		goto out;
+	}
+
+	profile_obj->rodata->num_cpu = num_cpu;
+	profile_obj->rodata->num_metric = num_metric;
+
+	/* adjust map sizes */
+	bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
+	bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
+	bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
+	bpf_map__resize(profile_obj->maps.counts, 1);
+
+	/* change target name */
+	profile_tgt_name = profile_target_name(profile_tgt_fd);
+	if (!profile_tgt_name)
+		goto out;
+
+	bpf_object__for_each_program(prog, profile_obj->obj) {
+		err = bpf_program__set_attach_target(prog, profile_tgt_fd,
+						     profile_tgt_name);
+		if (err) {
+			p_err("failed to set attach target\n");
+			goto out;
+		}
+	}
+
+	set_max_rlimit();
+	err = profiler_bpf__load(profile_obj);
+	if (err) {
+		p_err("failed to load profile_obj");
+		goto out;
+	}
+
+	err = profile_open_perf_events(profile_obj);
+	if (err)
+		goto out;
+
+	err = profiler_bpf__attach(profile_obj);
+	if (err) {
+		p_err("failed to attach profile_obj");
+		goto out;
+	}
+	signal(SIGINT, int_exit);
+
+	sleep(duration);
+	profile_print_and_cleanup();
+	return 0;
+
+out:
+	profile_close_perf_events(profile_obj);
+	if (profile_obj)
+		profiler_bpf__destroy(profile_obj);
+	close(profile_tgt_fd);
+	free(profile_tgt_name);
+	return err;
+}
+
+#endif /* BPFTOOL_WITHOUT_SKELETONS */
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -1560,6 +1999,7 @@ static int do_help(int argc, char **argv)
 		"                         [data_out FILE [data_size_out L]] \\\n"
 		"                         [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
 		"                         [repeat N]\n"
+		"       %s %s profile PROG [duration DURATION] METRICs\n"
 		"       %s %s tracelog\n"
 		"       %s %s help\n"
 		"\n"
@@ -1573,16 +2013,17 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
-		"                 cgroup/recvmsg6 | cgroup/getsockopt |\n"
-		"                 cgroup/setsockopt }\n"
+		"                 cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n"
+		"                 struct_ops | fentry | fexit | freplace }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
+		"       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2]);
 
 	return 0;
 }
@@ -1599,6 +2040,7 @@ static const struct cmd cmds[] = {
 	{ "detach",	do_detach },
 	{ "tracelog",	do_tracelog },
 	{ "run",	do_run },
+	{ "profile",	do_profile },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
new file mode 100644
index 000000000000..20034c12f7c5
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "profiler.h"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} accum_readings SEC(".maps");
+
+/* sample counts, one per cpu */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u64));
+} counts SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+const volatile __u32 num_metric = 1;
+#define MAX_NUM_MATRICS 4
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+	struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+	u32 key = bpf_get_smp_processor_id();
+	u32 i;
+
+	/* look up before reading, to reduce error */
+	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+		u32 flag = i;
+
+		ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
+		if (!ptrs[i])
+			return 0;
+	}
+
+	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+		struct bpf_perf_event_value reading;
+		int err;
+
+		err = bpf_perf_event_read_value(&events, key, &reading,
+						sizeof(reading));
+		if (err)
+			return 0;
+		*(ptrs[i]) = reading;
+		key += num_cpu;
+	}
+
+	return 0;
+}
+
+static inline void
+fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+{
+	struct bpf_perf_event_value *before, diff, *accum;
+
+	before = bpf_map_lookup_elem(&fentry_readings, &id);
+	/* only account samples with a valid fentry_reading */
+	if (before && before->counter) {
+		struct bpf_perf_event_value *accum;
+
+		diff.counter = after->counter - before->counter;
+		diff.enabled = after->enabled - before->enabled;
+		diff.running = after->running - before->running;
+
+		accum = bpf_map_lookup_elem(&accum_readings, &id);
+		if (accum) {
+			accum->counter += diff.counter;
+			accum->enabled += diff.enabled;
+			accum->running += diff.running;
+		}
+	}
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+	struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+	u32 cpu = bpf_get_smp_processor_id();
+	u32 i, one = 1, zero = 0;
+	int err;
+	u64 *count;
+
+	/* read all events before updating the maps, to reduce error */
+	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+		err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
+						readings + i, sizeof(*readings));
+		if (err)
+			return 0;
+	}
+	count = bpf_map_lookup_elem(&counts, &zero);
+	if (count) {
+		*count += 1;
+		for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
+			fexit_update_maps(i, &readings[i]);
+	}
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
new file mode 100644
index 000000000000..1f767e9510f7
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __PROFILER_H
+#define __PROFILER_H
+
+/* useful typedefs from vmlinux.h */
+
+typedef signed char __s8;
+typedef unsigned char __u8;
+typedef short int __s16;
+typedef short unsigned int __u16;
+typedef int __s32;
+typedef unsigned int __u32;
+typedef long long int __s64;
+typedef long long unsigned int __u64;
+
+typedef __s8 s8;
+typedef __u8 u8;
+typedef __s16 s16;
+typedef __u16 u16;
+typedef __s32 s32;
+typedef __u32 u32;
+typedef __s64 s64;
+typedef __u64 u64;
+
+enum {
+	false = 0,
+	true = 1,
+};
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+
+typedef __u16 __bitwise__ __le16;
+typedef __u16 __bitwise__ __be16;
+typedef __u32 __bitwise__ __le32;
+typedef __u32 __bitwise__ __be32;
+typedef __u64 __bitwise__ __le64;
+typedef __u64 __bitwise__ __be64;
+
+typedef __u16 __bitwise__ __sum16;
+typedef __u32 __bitwise__ __wsum;
+
+#endif /* __PROFILER_H */
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
new file mode 100644
index 000000000000..2a7befbd11ad
--- /dev/null
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/err.h>
+
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
+
+static const struct btf_type *map_info_type;
+static __u32 map_info_alloc_len;
+static struct btf *btf_vmlinux;
+static __s32 map_info_type_id;
+
+struct res {
+	unsigned int nr_maps;
+	unsigned int nr_errs;
+};
+
+static const struct btf *get_btf_vmlinux(void)
+{
+	if (btf_vmlinux)
+		return btf_vmlinux;
+
+	btf_vmlinux = libbpf_find_kernel_btf();
+	if (IS_ERR(btf_vmlinux))
+		p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
+
+	return btf_vmlinux;
+}
+
+static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
+{
+	const struct btf *kern_btf;
+	const struct btf_type *t;
+	const char *st_ops_name;
+
+	kern_btf = get_btf_vmlinux();
+	if (IS_ERR(kern_btf))
+		return "<btf_vmlinux_not_found>";
+
+	t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
+	st_ops_name = btf__name_by_offset(kern_btf, t->name_off);
+	st_ops_name += strlen(STRUCT_OPS_VALUE_PREFIX);
+
+	return st_ops_name;
+}
+
+static __s32 get_map_info_type_id(void)
+{
+	const struct btf *kern_btf;
+
+	if (map_info_type_id)
+		return map_info_type_id;
+
+	kern_btf = get_btf_vmlinux();
+	if (IS_ERR(kern_btf)) {
+		map_info_type_id = PTR_ERR(kern_btf);
+		return map_info_type_id;
+	}
+
+	map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info",
+						  BTF_KIND_STRUCT);
+	if (map_info_type_id < 0) {
+		p_err("can't find bpf_map_info from btf_vmlinux");
+		return map_info_type_id;
+	}
+	map_info_type = btf__type_by_id(kern_btf, map_info_type_id);
+
+	/* Ensure map_info_alloc() has at least what the bpftool needs */
+	map_info_alloc_len = map_info_type->size;
+	if (map_info_alloc_len < sizeof(struct bpf_map_info))
+		map_info_alloc_len = sizeof(struct bpf_map_info);
+
+	return map_info_type_id;
+}
+
+/* If the subcmd needs to print out the bpf_map_info,
+ * it should always call map_info_alloc to allocate
+ * a bpf_map_info object instead of allocating it
+ * on the stack.
+ *
+ * map_info_alloc() will take the running kernel's btf
+ * into account.  i.e. it will consider the
+ * sizeof(struct bpf_map_info) of the running kernel.
+ *
+ * It will enable the "struct_ops" cmd to print the latest
+ * "struct bpf_map_info".
+ *
+ * [ Recall that "struct_ops" requires the kernel's btf to
+ *   be available ]
+ */
+static struct bpf_map_info *map_info_alloc(__u32 *alloc_len)
+{
+	struct bpf_map_info *info;
+
+	if (get_map_info_type_id() < 0)
+		return NULL;
+
+	info = calloc(1, map_info_alloc_len);
+	if (!info)
+		p_err("mem alloc failed");
+	else
+		*alloc_len = map_info_alloc_len;
+
+	return info;
+}
+
+/* It iterates all struct_ops maps of the system.
+ * It returns the fd in "*res_fd" and map_info in "*info".
+ * In the very first iteration, info->id should be 0.
+ * An optional map "*name" filter can be specified.
+ * The filter can be made more flexible in the future.
+ * e.g. filter by kernel-struct-ops-name, regex-name, glob-name, ...etc.
+ *
+ * Return value:
+ *     1: A struct_ops map found.  It is returned in "*res_fd" and "*info".
+ *	  The caller can continue to call get_next in the future.
+ *     0: No struct_ops map is returned.
+ *        All struct_ops map has been found.
+ *    -1: Error and the caller should abort the iteration.
+ */
+static int get_next_struct_ops_map(const char *name, int *res_fd,
+				   struct bpf_map_info *info, __u32 info_len)
+{
+	__u32 id = info->id;
+	int err, fd;
+
+	while (true) {
+		err = bpf_map_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT)
+				return 0;
+			p_err("can't get next map: %s", strerror(errno));
+			return -1;
+		}
+
+		fd = bpf_map_get_fd_by_id(id);
+		if (fd < 0) {
+			if (errno == ENOENT)
+				continue;
+			p_err("can't get map by id (%u): %s",
+			      id, strerror(errno));
+			return -1;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, info, &info_len);
+		if (err) {
+			p_err("can't get map info: %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		if (info->type == BPF_MAP_TYPE_STRUCT_OPS &&
+		    (!name || !strcmp(name, info->name))) {
+			*res_fd = fd;
+			return 1;
+		}
+		close(fd);
+	}
+}
+
+static int cmd_retval(const struct res *res, bool must_have_one_map)
+{
+	if (res->nr_errs || (!res->nr_maps && must_have_one_map))
+		return -1;
+
+	return 0;
+}
+
+/* "data" is the work_func private storage */
+typedef int (*work_func)(int fd, const struct bpf_map_info *info, void *data,
+			 struct json_writer *wtr);
+
+/* Find all struct_ops map in the system.
+ * Filter out by "name" (if specified).
+ * Then call "func(fd, info, data, wtr)" on each struct_ops map found.
+ */
+static struct res do_search(const char *name, work_func func, void *data,
+			    struct json_writer *wtr)
+{
+	struct bpf_map_info *info;
+	struct res res = {};
+	__u32 info_len;
+	int fd, err;
+
+	info = map_info_alloc(&info_len);
+	if (!info) {
+		res.nr_errs++;
+		return res;
+	}
+
+	if (wtr)
+		jsonw_start_array(wtr);
+	while ((err = get_next_struct_ops_map(name, &fd, info, info_len)) == 1) {
+		res.nr_maps++;
+		err = func(fd, info, data, wtr);
+		if (err)
+			res.nr_errs++;
+		close(fd);
+	}
+	if (wtr)
+		jsonw_end_array(wtr);
+
+	if (err)
+		res.nr_errs++;
+
+	if (!wtr && name && !res.nr_errs && !res.nr_maps)
+		/* It is not printing empty [].
+		 * Thus, needs to specifically say nothing found
+		 * for "name" here.
+		 */
+		p_err("no struct_ops found for %s", name);
+	else if (!wtr && json_output && !res.nr_errs)
+		/* The "func()" above is not writing any json (i.e. !wtr
+		 * test here).
+		 *
+		 * However, "-j" is enabled and there is no errs here,
+		 * so call json_null() as the current convention of
+		 * other cmds.
+		 */
+		jsonw_null(json_wtr);
+
+	free(info);
+	return res;
+}
+
+static struct res do_one_id(const char *id_str, work_func func, void *data,
+			    struct json_writer *wtr)
+{
+	struct bpf_map_info *info;
+	struct res res = {};
+	unsigned long id;
+	__u32 info_len;
+	char *endptr;
+	int fd;
+
+	id = strtoul(id_str, &endptr, 0);
+	if (*endptr || !id || id > UINT32_MAX) {
+		p_err("invalid id %s", id_str);
+		res.nr_errs++;
+		return res;
+	}
+
+	fd = bpf_map_get_fd_by_id(id);
+	if (fd == -1) {
+		p_err("can't get map by id (%lu): %s", id, strerror(errno));
+		res.nr_errs++;
+		return res;
+	}
+
+	info = map_info_alloc(&info_len);
+	if (!info) {
+		res.nr_errs++;
+		goto done;
+	}
+
+	if (bpf_obj_get_info_by_fd(fd, info, &info_len)) {
+		p_err("can't get map info: %s", strerror(errno));
+		res.nr_errs++;
+		goto done;
+	}
+
+	if (info->type != BPF_MAP_TYPE_STRUCT_OPS) {
+		p_err("%s id %u is not a struct_ops map", info->name, info->id);
+		res.nr_errs++;
+		goto done;
+	}
+
+	res.nr_maps++;
+
+	if (func(fd, info, data, wtr))
+		res.nr_errs++;
+	else if (!wtr && json_output)
+		/* The "func()" above is not writing any json (i.e. !wtr
+		 * test here).
+		 *
+		 * However, "-j" is enabled and there is no errs here,
+		 * so call json_null() as the current convention of
+		 * other cmds.
+		 */
+		jsonw_null(json_wtr);
+
+done:
+	free(info);
+	close(fd);
+
+	return res;
+}
+
+static struct res do_work_on_struct_ops(const char *search_type,
+					const char *search_term,
+					work_func func, void *data,
+					struct json_writer *wtr)
+{
+	if (search_type) {
+		if (is_prefix(search_type, "id"))
+			return do_one_id(search_term, func, data, wtr);
+		else if (!is_prefix(search_type, "name"))
+			usage();
+	}
+
+	return do_search(search_term, func, data, wtr);
+}
+
+static int __do_show(int fd, const struct bpf_map_info *info, void *data,
+		     struct json_writer *wtr)
+{
+	if (wtr) {
+		jsonw_start_object(wtr);
+		jsonw_uint_field(wtr, "id", info->id);
+		jsonw_string_field(wtr, "name", info->name);
+		jsonw_string_field(wtr, "kernel_struct_ops",
+				   get_kern_struct_ops_name(info));
+		jsonw_end_object(wtr);
+	} else {
+		printf("%u: %-15s %-32s\n", info->id, info->name,
+		       get_kern_struct_ops_name(info));
+	}
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	const char *search_type = NULL, *search_term = NULL;
+	struct res res;
+
+	if (argc && argc != 2)
+		usage();
+
+	if (argc == 2) {
+		search_type = GET_ARG();
+		search_term = GET_ARG();
+	}
+
+	res = do_work_on_struct_ops(search_type, search_term, __do_show,
+				    NULL, json_wtr);
+
+	return cmd_retval(&res, !!search_term);
+}
+
+static int __do_dump(int fd, const struct bpf_map_info *info, void *data,
+		     struct json_writer *wtr)
+{
+	struct btf_dumper *d = (struct btf_dumper *)data;
+	const struct btf_type *struct_ops_type;
+	const struct btf *kern_btf = d->btf;
+	const char *struct_ops_name;
+	int zero = 0;
+	void *value;
+
+	/* note: d->jw == wtr */
+
+	kern_btf = d->btf;
+
+	/* The kernel supporting BPF_MAP_TYPE_STRUCT_OPS must have
+	 * btf_vmlinux_value_type_id.
+	 */
+	struct_ops_type = btf__type_by_id(kern_btf,
+					  info->btf_vmlinux_value_type_id);
+	struct_ops_name = btf__name_by_offset(kern_btf,
+					      struct_ops_type->name_off);
+	value = calloc(1, info->value_size);
+	if (!value) {
+		p_err("mem alloc failed");
+		return -1;
+	}
+
+	if (bpf_map_lookup_elem(fd, &zero, value)) {
+		p_err("can't lookup struct_ops map %s id %u",
+		      info->name, info->id);
+		free(value);
+		return -1;
+	}
+
+	jsonw_start_object(wtr);
+	jsonw_name(wtr, "bpf_map_info");
+	btf_dumper_type(d, map_info_type_id, (void *)info);
+	jsonw_end_object(wtr);
+
+	jsonw_start_object(wtr);
+	jsonw_name(wtr, struct_ops_name);
+	btf_dumper_type(d, info->btf_vmlinux_value_type_id, value);
+	jsonw_end_object(wtr);
+
+	free(value);
+
+	return 0;
+}
+
+static int do_dump(int argc, char **argv)
+{
+	const char *search_type = NULL, *search_term = NULL;
+	json_writer_t *wtr = json_wtr;
+	const struct btf *kern_btf;
+	struct btf_dumper d = {};
+	struct res res;
+
+	if (argc && argc != 2)
+		usage();
+
+	if (argc == 2) {
+		search_type = GET_ARG();
+		search_term = GET_ARG();
+	}
+
+	kern_btf = get_btf_vmlinux();
+	if (IS_ERR(kern_btf))
+		return -1;
+
+	if (!json_output) {
+		wtr = jsonw_new(stdout);
+		if (!wtr) {
+			p_err("can't create json writer");
+			return -1;
+		}
+		jsonw_pretty(wtr, true);
+	}
+
+	d.btf = kern_btf;
+	d.jw = wtr;
+	d.is_plain_text = !json_output;
+	d.prog_id_as_func_ptr = true;
+
+	res = do_work_on_struct_ops(search_type, search_term, __do_dump, &d,
+				    wtr);
+
+	if (!json_output)
+		jsonw_destroy(&wtr);
+
+	return cmd_retval(&res, !!search_term);
+}
+
+static int __do_unregister(int fd, const struct bpf_map_info *info, void *data,
+			   struct json_writer *wtr)
+{
+	int zero = 0;
+
+	if (bpf_map_delete_elem(fd, &zero)) {
+		p_err("can't unload %s %s id %u: %s",
+		      get_kern_struct_ops_name(info), info->name,
+		      info->id, strerror(errno));
+		return -1;
+	}
+
+	p_info("Unregistered %s %s id %u",
+	       get_kern_struct_ops_name(info), info->name,
+	       info->id);
+
+	return 0;
+}
+
+static int do_unregister(int argc, char **argv)
+{
+	const char *search_type, *search_term;
+	struct res res;
+
+	if (argc != 2)
+		usage();
+
+	search_type = GET_ARG();
+	search_term = GET_ARG();
+
+	res = do_work_on_struct_ops(search_type, search_term,
+				    __do_unregister, NULL, NULL);
+
+	return cmd_retval(&res, true);
+}
+
+static int do_register(int argc, char **argv)
+{
+	const struct bpf_map_def *def;
+	struct bpf_map_info info = {};
+	__u32 info_len = sizeof(info);
+	int nr_errs = 0, nr_maps = 0;
+	struct bpf_object *obj;
+	struct bpf_link *link;
+	struct bpf_map *map;
+	const char *file;
+
+	if (argc != 1)
+		usage();
+
+	file = GET_ARG();
+
+	obj = bpf_object__open(file);
+	if (IS_ERR_OR_NULL(obj))
+		return -1;
+
+	set_max_rlimit();
+
+	if (bpf_object__load(obj)) {
+		bpf_object__close(obj);
+		return -1;
+	}
+
+	bpf_object__for_each_map(map, obj) {
+		def = bpf_map__def(map);
+		if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+			continue;
+
+		link = bpf_map__attach_struct_ops(map);
+		if (IS_ERR(link)) {
+			p_err("can't register struct_ops %s: %s",
+			      bpf_map__name(map),
+			      strerror(-PTR_ERR(link)));
+			nr_errs++;
+			continue;
+		}
+		nr_maps++;
+
+		bpf_link__disconnect(link);
+		bpf_link__destroy(link);
+
+		if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info,
+					    &info_len))
+			p_info("Registered %s %s id %u",
+			       get_kern_struct_ops_name(&info),
+			       bpf_map__name(map),
+			       info.id);
+		else
+			/* Not p_err.  The struct_ops was attached
+			 * successfully.
+			 */
+			p_info("Registered %s but can't find id: %s",
+			       bpf_map__name(map), strerror(errno));
+	}
+
+	bpf_object__close(obj);
+
+	if (nr_errs)
+		return -1;
+
+	if (!nr_maps) {
+		p_err("no struct_ops found in %s", file);
+		return -1;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+
+	return 0;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n"
+		"       %s %s dump [STRUCT_OPS_MAP]\n"
+		"       %s %s register OBJ\n"
+		"       %s %s unregister STRUCT_OPS_MAP\n"
+		"       %s %s help\n"
+		"\n"
+		"       OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
+		"       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n",
+		bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "register",	do_register },
+	{ "unregister",	do_unregister },
+	{ "dump",	do_dump },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_struct_ops(int argc, char **argv)
+{
+	int err;
+
+	err = cmd_select(cmds, argc, argv, do_help);
+
+	btf__free(btf_vmlinux);
+	return err;
+}
diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore
index 90a456a2a72f..ffdb70230c8b 100644
--- a/tools/bpf/runqslower/.gitignore
+++ b/tools/bpf/runqslower/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /.output
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 48a39f72fadf..1f18a409f044 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -5,9 +5,7 @@
 #include "runqslower.h"
 
 #define TASK_RUNNING 0
-
-#define BPF_F_INDEX_MASK		0xffffffffULL
-#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+#define BPF_F_CURRENT_CPU 0xffffffffULL
 
 const volatile __u64 min_us = 0;
 const volatile pid_t targ_pid = 0;
diff --git a/tools/build/.gitignore b/tools/build/.gitignore
index a776371a3502..98ae1f509592 100644
--- a/tools/build/.gitignore
+++ b/tools/build/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 fixdep
diff --git a/tools/build/feature/.gitignore b/tools/build/feature/.gitignore
index 09b335b98842..15fcd34acdb9 100644
--- a/tools/build/feature/.gitignore
+++ b/tools/build/feature/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.d
 *.bin
 *.output
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ac0d8088565..ab8e89a7009c 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -67,7 +67,8 @@ FILES=                                          \
          test-llvm.bin				\
          test-llvm-version.bin			\
          test-libaio.bin			\
-         test-libzstd.bin
+         test-libzstd.bin			\
+         test-clang-bpf-global-var.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc
 CXX ?= $(CROSS_COMPILE)g++
 PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
 LLVM_CONFIG ?= llvm-config
+CLANG ?= clang
 
 all: $(FILES)
 
@@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin:
 $(OUTPUT)test-libzstd.bin:
 	$(BUILD) -lzstd
 
+$(OUTPUT)test-clang-bpf-global-var.bin:
+	$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) |	\
+		grep BTF_KIND_VAR
+
+
 ###############################
 
 clean:
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
new file mode 100644
index 000000000000..221f1481d52e
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-global-var.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+volatile int global_value_for_test = 1;
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore
index 633cd9b874f9..46a82775f2ca 100644
--- a/tools/cgroup/.gitignore
+++ b/tools/cgroup/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 cgroup_event_listener
diff --git a/tools/edid/1024x768.S b/tools/edid/1024x768.S
new file mode 100644
index 000000000000..4aed3f9ab88a
--- /dev/null
+++ b/tools/edid/1024x768.S
@@ -0,0 +1,43 @@
+/*
+   1024x768.S: EDID data set for standard 1024x768 60 Hz monitor
+
+   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 65000 /* kHz */
+#define XPIX 1024
+#define YPIX 768
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 320
+#define YBLANK 38
+#define XOFFSET 8
+#define XPULSE 144
+#define YOFFSET 3
+#define YPULSE 6
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux XGA"
+#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
+#define HSYNC_POL 0
+#define VSYNC_POL 0
+
+#include "edid.S"
diff --git a/tools/edid/1280x1024.S b/tools/edid/1280x1024.S
new file mode 100644
index 000000000000..b26dd424cad7
--- /dev/null
+++ b/tools/edid/1280x1024.S
@@ -0,0 +1,43 @@
+/*
+   1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor
+
+   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 108000 /* kHz */
+#define XPIX 1280
+#define YPIX 1024
+#define XY_RATIO XY_RATIO_5_4
+#define XBLANK 408
+#define YBLANK 42
+#define XOFFSET 48
+#define XPULSE 112
+#define YOFFSET 1
+#define YPULSE 3
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1600x1200.S b/tools/edid/1600x1200.S
new file mode 100644
index 000000000000..0d091b282768
--- /dev/null
+++ b/tools/edid/1600x1200.S
@@ -0,0 +1,43 @@
+/*
+   1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor
+
+   Copyright (C) 2013 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 162000 /* kHz */
+#define XPIX 1600
+#define YPIX 1200
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 560
+#define YBLANK 50
+#define XOFFSET 64
+#define XPULSE 192
+#define YOFFSET 1
+#define YPULSE 3
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux UXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1680x1050.S b/tools/edid/1680x1050.S
new file mode 100644
index 000000000000..7dfed9a33eab
--- /dev/null
+++ b/tools/edid/1680x1050.S
@@ -0,0 +1,43 @@
+/*
+   1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor
+
+   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 146250 /* kHz */
+#define XPIX 1680
+#define YPIX 1050
+#define XY_RATIO XY_RATIO_16_10
+#define XBLANK 560
+#define YBLANK 39
+#define XOFFSET 104
+#define XPULSE 176
+#define YOFFSET 3
+#define YPULSE 6
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux WSXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1920x1080.S b/tools/edid/1920x1080.S
new file mode 100644
index 000000000000..d6ffbba28e95
--- /dev/null
+++ b/tools/edid/1920x1080.S
@@ -0,0 +1,43 @@
+/*
+   1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor
+
+   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 148500 /* kHz */
+#define XPIX 1920
+#define YPIX 1080
+#define XY_RATIO XY_RATIO_16_9
+#define XBLANK 280
+#define YBLANK 45
+#define XOFFSET 88
+#define XPULSE 44
+#define YOFFSET 4
+#define YPULSE 5
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux FHD"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/800x600.S b/tools/edid/800x600.S
new file mode 100644
index 000000000000..a5616588de08
--- /dev/null
+++ b/tools/edid/800x600.S
@@ -0,0 +1,40 @@
+/*
+   800x600.S: EDID data set for standard 800x600 60 Hz monitor
+
+   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+   Copyright (C) 2014 Linaro Limited
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 40000 /* kHz */
+#define XPIX 800
+#define YPIX 600
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 256
+#define YBLANK 28
+#define XOFFSET 40
+#define XPULSE 128
+#define YOFFSET 1
+#define YPULSE 4
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SVGA"
+#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/Makefile b/tools/edid/Makefile
new file mode 100644
index 000000000000..85a927dfab02
--- /dev/null
+++ b/tools/edid/Makefile
@@ -0,0 +1,37 @@
+
+SOURCES	:= $(wildcard [0-9]*x[0-9]*.S)
+
+BIN	:= $(patsubst %.S, %.bin, $(SOURCES))
+
+IHEX	:= $(patsubst %.S, %.bin.ihex, $(SOURCES))
+
+CODE	:= $(patsubst %.S, %.c, $(SOURCES))
+
+all:	$(BIN) $(IHEX) $(CODE)
+
+clean:
+	@rm -f *.o *.bin.ihex *.bin *.c
+
+%.o:	%.S
+	@cc -c $^
+
+%.bin.nocrc:	%.o
+	@objcopy -Obinary $^ $@
+
+%.crc:	%.bin.nocrc
+	@list=$$(for i in `seq 1 127`; do head -c$$i $^ | tail -c1 \
+		| hexdump -v -e '/1 "%02X+"'; done); \
+		echo "ibase=16;100-($${list%?})%100" | bc >$@
+
+%.p:	%.crc %.S
+	@cc -c -DCRC="$$(cat $*.crc)" -o $@ $*.S
+
+%.bin:	%.p
+	@objcopy -Obinary $^ $@
+
+%.bin.ihex:	%.p
+	@objcopy -Oihex $^ $@
+	@dos2unix $@ 2>/dev/null
+
+%.c:	%.bin
+	@echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@
diff --git a/tools/edid/edid.S b/tools/edid/edid.S
new file mode 100644
index 000000000000..c3d13815526d
--- /dev/null
+++ b/tools/edid/edid.S
@@ -0,0 +1,274 @@
+/*
+   edid.S: EDID data template
+
+   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
+*/
+
+
+/* Manufacturer */
+#define MFG_LNX1 'L'
+#define MFG_LNX2 'N'
+#define MFG_LNX3 'X'
+#define SERIAL 0
+#define YEAR 2012
+#define WEEK 5
+
+/* EDID 1.3 standard definitions */
+#define XY_RATIO_16_10	0b00
+#define XY_RATIO_4_3	0b01
+#define XY_RATIO_5_4	0b10
+#define XY_RATIO_16_9	0b11
+
+/* Provide defaults for the timing bits */
+#ifndef ESTABLISHED_TIMING1_BITS
+#define ESTABLISHED_TIMING1_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING2_BITS
+#define ESTABLISHED_TIMING2_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING3_BITS
+#define ESTABLISHED_TIMING3_BITS 0x00
+#endif
+
+#define mfgname2id(v1,v2,v3) \
+	((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
+#define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
+#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f))
+#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f))
+#define msbs4(v1,v2,v3,v4) \
+	((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\
+	(((v3>>4)&0x03)<<2)+((v4>>4)&0x03))
+#define pixdpi2mm(pix,dpi) ((pix*25)/dpi)
+#define xsize pixdpi2mm(XPIX,DPI)
+#define ysize pixdpi2mm(YPIX,DPI)
+
+		.data
+
+/* Fixed header pattern */
+header:		.byte	0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+
+mfg_id:		.hword	swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3))
+
+prod_code:	.hword	0
+
+/* Serial number. 32 bits, little endian. */
+serial_number:	.long	SERIAL
+
+/* Week of manufacture */
+week:		.byte	WEEK
+
+/* Year of manufacture, less 1990. (1990-2245)
+   If week=255, it is the model year instead */
+year:		.byte	YEAR-1990
+
+version:	.byte	VERSION 	/* EDID version, usually 1 (for 1.3) */
+revision:	.byte	REVISION	/* EDID revision, usually 3 (for 1.3) */
+
+/* If Bit 7=1	Digital input. If set, the following bit definitions apply:
+     Bits 6-1	Reserved, must be 0
+     Bit 0	Signal is compatible with VESA DFP 1.x TMDS CRGB,
+		  1 pixel per clock, up to 8 bits per color, MSB aligned,
+   If Bit 7=0	Analog input. If clear, the following bit definitions apply:
+     Bits 6-5	Video white and sync levels, relative to blank
+		  00=+0.7/-0.3 V; 01=+0.714/-0.286 V;
+		  10=+1.0/-0.4 V; 11=+0.7/0 V
+   Bit 4	Blank-to-black setup (pedestal) expected
+   Bit 3	Separate sync supported
+   Bit 2	Composite sync (on HSync) supported
+   Bit 1	Sync on green supported
+   Bit 0	VSync pulse must be serrated when somposite or
+		  sync-on-green is used. */
+video_parms:	.byte	0x6d
+
+/* Maximum horizontal image size, in centimetres
+   (max 292 cm/115 in at 16:9 aspect ratio) */
+max_hor_size:	.byte	xsize/10
+
+/* Maximum vertical image size, in centimetres.
+   If either byte is 0, undefined (e.g. projector) */
+max_vert_size:	.byte	ysize/10
+
+/* Display gamma, minus 1, times 100 (range 1.00-3.5 */
+gamma:		.byte	120
+
+/* Bit 7	DPMS standby supported
+   Bit 6	DPMS suspend supported
+   Bit 5	DPMS active-off supported
+   Bits 4-3	Display type: 00=monochrome; 01=RGB colour;
+		  10=non-RGB multicolour; 11=undefined
+   Bit 2	Standard sRGB colour space. Bytes 25-34 must contain
+		  sRGB standard values.
+   Bit 1	Preferred timing mode specified in descriptor block 1.
+   Bit 0	GTF supported with default parameter values. */
+dsp_features:	.byte	0xea
+
+/* Chromaticity coordinates. */
+/* Red and green least-significant bits
+   Bits 7-6	Red x value least-significant 2 bits
+   Bits 5-4	Red y value least-significant 2 bits
+   Bits 3-2	Green x value lst-significant 2 bits
+   Bits 1-0	Green y value least-significant 2 bits */
+red_green_lsb:	.byte	0x5e
+
+/* Blue and white least-significant 2 bits */
+blue_white_lsb:	.byte	0xc0
+
+/* Red x value most significant 8 bits.
+   0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */
+red_x_msb:	.byte	0xa4
+
+/* Red y value most significant 8 bits */
+red_y_msb:	.byte	0x59
+
+/* Green x and y value most significant 8 bits */
+green_x_y_msb:	.byte	0x4a,0x98
+
+/* Blue x and y value most significant 8 bits */
+blue_x_y_msb:	.byte	0x25,0x20
+
+/* Default white point x and y value most significant 8 bits */
+white_x_y_msb:	.byte	0x50,0x54
+
+/* Established timings */
+/* Bit 7	720x400 @ 70 Hz
+   Bit 6	720x400 @ 88 Hz
+   Bit 5	640x480 @ 60 Hz
+   Bit 4	640x480 @ 67 Hz
+   Bit 3	640x480 @ 72 Hz
+   Bit 2	640x480 @ 75 Hz
+   Bit 1	800x600 @ 56 Hz
+   Bit 0	800x600 @ 60 Hz */
+estbl_timing1:	.byte	ESTABLISHED_TIMING1_BITS
+
+/* Bit 7	800x600 @ 72 Hz
+   Bit 6	800x600 @ 75 Hz
+   Bit 5	832x624 @ 75 Hz
+   Bit 4	1024x768 @ 87 Hz, interlaced (1024x768)
+   Bit 3	1024x768 @ 60 Hz
+   Bit 2	1024x768 @ 72 Hz
+   Bit 1	1024x768 @ 75 Hz
+   Bit 0	1280x1024 @ 75 Hz */
+estbl_timing2:	.byte	ESTABLISHED_TIMING2_BITS
+
+/* Bit 7	1152x870 @ 75 Hz (Apple Macintosh II)
+   Bits 6-0 	Other manufacturer-specific display mod */
+estbl_timing3:	.byte	ESTABLISHED_TIMING3_BITS
+
+/* Standard timing */
+/* X resolution, less 31, divided by 8 (256-2288 pixels) */
+std_xres:	.byte	(XPIX/8)-31
+/* Y resolution, X:Y pixel ratio
+   Bits 7-6	X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9.
+   Bits 5-0	Vertical frequency, less 60 (60-123 Hz) */
+std_vres:	.byte	(XY_RATIO<<6)+VFREQ-60
+		.fill	7,2,0x0101	/* Unused */
+
+descriptor1:
+/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */
+clock:		.hword	CLOCK/10
+
+/* Horizontal active pixels 8 lsbits (0-4095) */
+x_act_lsb:	.byte	XPIX&0xff
+/* Horizontal blanking pixels 8 lsbits (0-4095)
+   End of active to start of next active. */
+x_blk_lsb:	.byte	XBLANK&0xff
+/* Bits 7-4 	Horizontal active pixels 4 msbits
+   Bits 3-0	Horizontal blanking pixels 4 msbits */
+x_msbs:		.byte	msbs2(XPIX,XBLANK)
+
+/* Vertical active lines 8 lsbits (0-4095) */
+y_act_lsb:	.byte	YPIX&0xff
+/* Vertical blanking lines 8 lsbits (0-4095) */
+y_blk_lsb:	.byte	YBLANK&0xff
+/* Bits 7-4 	Vertical active lines 4 msbits
+   Bits 3-0 	Vertical blanking lines 4 msbits */
+y_msbs:		.byte	msbs2(YPIX,YBLANK)
+
+/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */
+x_snc_off_lsb:	.byte	XOFFSET&0xff
+/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */
+x_snc_pls_lsb:	.byte	XPULSE&0xff
+/* Bits 7-4 	Vertical sync offset lines 4 lsbits (0-63)
+   Bits 3-0 	Vertical sync pulse width lines 4 lsbits (0-63) */
+y_snc_lsb:	.byte	lsbs2(YOFFSET, YPULSE)
+/* Bits 7-6 	Horizontal sync offset pixels 2 msbits
+   Bits 5-4 	Horizontal sync pulse width pixels 2 msbits
+   Bits 3-2 	Vertical sync offset lines 2 msbits
+   Bits 1-0 	Vertical sync pulse width lines 2 msbits */
+xy_snc_msbs:	.byte	msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE)
+
+/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+x_dsp_size:	.byte	xsize&0xff
+
+/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+y_dsp_size:	.byte	ysize&0xff
+
+/* Bits 7-4 	Horizontal display size, mm, 4 msbits
+   Bits 3-0 	Vertical display size, mm, 4 msbits */
+dsp_size_mbsb:	.byte	msbs2(xsize,ysize)
+
+/* Horizontal border pixels (each side; total is twice this) */
+x_border:	.byte	0
+/* Vertical border lines (each side; total is twice this) */
+y_border:	.byte	0
+
+/* Bit 7 	Interlaced
+   Bits 6-5 	Stereo mode: 00=No stereo; other values depend on bit 0:
+   Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar,
+     sync=1 during left; 11=4-way interleaved stereo
+   Bit 0=1 2-way interleaved stereo: 01=Right image on even lines;
+     10=Left image on even lines; 11=side-by-side
+   Bits 4-3 	Sync type: 00=Analog composite; 01=Bipolar analog composite;
+     10=Digital composite (on HSync); 11=Digital separate
+   Bit 2 	If digital separate: Vertical sync polarity (1=positive)
+   Other types: VSync serrated (HSync during VSync)
+   Bit 1 	If analog sync: Sync on all 3 RGB lines (else green only)
+   Digital: HSync polarity (1=positive)
+   Bit 0 	2-way line-interleaved stereo, if bits 4-3 are not 00. */
+features:	.byte	0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1)
+
+descriptor2:	.byte	0,0	/* Not a detailed timing descriptor */
+		.byte	0	/* Must be zero */
+		.byte	0xff	/* Descriptor is monitor serial number (text) */
+		.byte	0	/* Must be zero */
+start1:		.ascii	"Linux #0"
+end1:		.byte	0x0a	/* End marker */
+		.fill	12-(end1-start1), 1, 0x20 /* Padded spaces */
+descriptor3:	.byte	0,0	/* Not a detailed timing descriptor */
+		.byte	0	/* Must be zero */
+		.byte	0xfd	/* Descriptor is monitor range limits */
+		.byte	0	/* Must be zero */
+start2:		.byte	VFREQ-1	/* Minimum vertical field rate (1-255 Hz) */
+		.byte	VFREQ+1	/* Maximum vertical field rate (1-255 Hz) */
+		.byte	(CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate
+						    (1-255 kHz) */
+		.byte	(CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate
+						    (1-255 kHz) */
+		.byte	(CLOCK/10000)+1	/* Maximum pixel clock rate, rounded up
+					   to 10 MHz multiple (10-2550 MHz) */
+		.byte	0	/* No extended timing information type */
+end2:		.byte	0x0a	/* End marker */
+		.fill	12-(end2-start2), 1, 0x20 /* Padded spaces */
+descriptor4:	.byte	0,0	/* Not a detailed timing descriptor */
+		.byte	0	/* Must be zero */
+		.byte	0xfc	/* Descriptor is text */
+		.byte	0	/* Must be zero */
+start3:		.ascii	TIMING_NAME
+end3:		.byte	0x0a	/* End marker */
+		.fill	12-(end3-start3), 1, 0x20 /* Padded spaces */
+extensions:	.byte	0	/* Number of extensions to follow */
+checksum:	.byte	CRC	/* Sum of all bytes must be 0 */
diff --git a/tools/edid/hex b/tools/edid/hex
new file mode 100644
index 000000000000..8873ebb618af
--- /dev/null
+++ b/tools/edid/hex
@@ -0,0 +1 @@
+"\t" 8/1 "0x%02x, " "\n"
diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore
index a94c0e83b209..a00d604027a2 100644
--- a/tools/gpio/.gitignore
+++ b/tools/gpio/.gitignore
@@ -1,4 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gpio-event-mon
 gpio-hammer
+gpio-watch
 lsgpio
 include/linux/gpio.h
diff --git a/tools/gpio/Build b/tools/gpio/Build
index 4141f35837db..67c7b7f6a717 100644
--- a/tools/gpio/Build
+++ b/tools/gpio/Build
@@ -2,3 +2,4 @@ gpio-utils-y += gpio-utils.o
 lsgpio-y += lsgpio.o gpio-utils.o
 gpio-hammer-y += gpio-hammer.o gpio-utils.o
 gpio-event-mon-y += gpio-event-mon.o gpio-utils.o
+gpio-watch-y += gpio-watch.o
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index 6080de58861f..440434027557 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -18,7 +18,7 @@ MAKEFLAGS += -r
 
 override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
-ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
+ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon gpio-watch
 ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 all: $(ALL_PROGRAMS)
@@ -35,7 +35,7 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h
 
 prepare: $(OUTPUT)include/linux/gpio.h
 
-GPIO_UTILS_IN := $(output)gpio-utils-in.o
+GPIO_UTILS_IN := $(OUTPUT)gpio-utils-in.o
 $(GPIO_UTILS_IN): prepare FORCE
 	$(Q)$(MAKE) $(build)=gpio-utils
 
@@ -66,6 +66,15 @@ $(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
 $(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
+#
+# gpio-watch
+#
+GPIO_WATCH_IN := $(OUTPUT)gpio-watch-in.o
+$(GPIO_WATCH_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=gpio-watch
+$(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
 clean:
 	rm -f $(ALL_PROGRAMS)
 	rm -f $(OUTPUT)include/linux/gpio.h
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 0e0060a6eb34..9fd926e8cb52 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -77,7 +77,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
 
 		fprintf(stdout, "[%c] ", swirr[j]);
 		j++;
-		if (j == sizeof(swirr)-1)
+		if (j == sizeof(swirr) - 1)
 			j = 0;
 
 		fprintf(stdout, "[");
@@ -135,7 +135,14 @@ int main(int argc, char **argv)
 			device_name = optarg;
 			break;
 		case 'o':
-			lines[i] = strtoul(optarg, NULL, 10);
+			/*
+			 * Avoid overflow. Do not immediately error, we want to
+			 * be able to accurately report on the amount of times
+			 * '-o' was given to give an accurate error message
+			 */
+			if (i < GPIOHANDLES_MAX)
+				lines[i] = strtoul(optarg, NULL, 10);
+
 			i++;
 			break;
 		case '?':
@@ -143,6 +150,14 @@ int main(int argc, char **argv)
 			return -1;
 		}
 	}
+
+	if (i >= GPIOHANDLES_MAX) {
+		fprintf(stderr,
+			"Only %d occurrences of '-o' are allowed, %d were found\n",
+			GPIOHANDLES_MAX, i + 1);
+		return -1;
+	}
+
 	nlines = i;
 
 	if (!device_name || !nlines) {
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 53470de6a502..06003789e7c7 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -17,7 +17,7 @@
 #include <linux/gpio.h>
 #include "gpio-utils.h"
 
-#define COMSUMER "gpio-utils"
+#define CONSUMER "gpio-utils"
 
 /**
  * doc: Operation of gpio
@@ -209,7 +209,7 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
 
 	ret = gpiotools_request_linehandle(device_name, lines, nlines,
 					   GPIOHANDLE_REQUEST_INPUT, data,
-					   COMSUMER);
+					   CONSUMER);
 	if (ret < 0)
 		return ret;
 
@@ -259,7 +259,7 @@ int gpiotools_sets(const char *device_name, unsigned int *lines,
 
 	ret = gpiotools_request_linehandle(device_name, lines, nlines,
 					   GPIOHANDLE_REQUEST_OUTPUT, data,
-					   COMSUMER);
+					   CONSUMER);
 	if (ret < 0)
 		return ret;
 
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
new file mode 100644
index 000000000000..5cea24fddfa7
--- /dev/null
+++ b/tools/gpio/gpio-watch.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * gpio-watch - monitor unrequested lines for property changes using the
+ *              character device
+ *
+ * Copyright (C) 2019 BayLibre SAS
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+int main(int argc, char **argv)
+{
+	struct gpioline_info_changed chg;
+	struct gpioline_info req;
+	struct pollfd pfd;
+	int fd, i, j, ret;
+	char *event, *end;
+	ssize_t rd;
+
+	if (argc < 3)
+		goto err_usage;
+
+	fd = open(argv[1], O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		perror("unable to open gpiochip");
+		return EXIT_FAILURE;
+	}
+
+	for (i = 0, j = 2; i < argc - 2; i++, j++) {
+		memset(&req, 0, sizeof(req));
+
+		req.line_offset = strtoul(argv[j], &end, 0);
+		if (*end != '\0')
+			goto err_usage;
+
+		ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+		if (ret) {
+			perror("unable to set up line watch");
+			return EXIT_FAILURE;
+		}
+	}
+
+	pfd.fd = fd;
+	pfd.events = POLLIN | POLLPRI;
+
+	for (;;) {
+		ret = poll(&pfd, 1, 5000);
+		if (ret < 0) {
+			perror("error polling the linechanged fd");
+			return EXIT_FAILURE;
+		} else if (ret > 0) {
+			memset(&chg, 0, sizeof(chg));
+			rd = read(pfd.fd, &chg, sizeof(chg));
+			if (rd < 0 || rd != sizeof(chg)) {
+				if (rd != sizeof(chg))
+					errno = EIO;
+
+				perror("error reading line change event");
+				return EXIT_FAILURE;
+			}
+
+			switch (chg.event_type) {
+			case GPIOLINE_CHANGED_REQUESTED:
+				event = "requested";
+				break;
+			case GPIOLINE_CHANGED_RELEASED:
+				event = "released";
+				break;
+			case GPIOLINE_CHANGED_CONFIG:
+				event = "config changed";
+				break;
+			default:
+				fprintf(stderr,
+					"invalid event type received from the kernel\n");
+				return EXIT_FAILURE;
+			}
+
+			printf("line %u: %s at %llu\n",
+			       chg.info.line_offset, event, chg.timestamp);
+		}
+	}
+
+	return 0;
+
+err_usage:
+	printf("%s: <gpiochip> <line0> <line1> ...\n", argv[0]);
+	return EXIT_FAILURE;
+}
diff --git a/tools/iio/.gitignore b/tools/iio/.gitignore
index 3758202618bd..5bd6f4df98b7 100644
--- a/tools/iio/.gitignore
+++ b/tools/iio/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 iio_event_monitor
 iio_generic_buffer
 lsiio
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
index e734da3e5b33..67e01bbadbfe 100644
--- a/tools/include/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h
@@ -2,12 +2,12 @@
 #ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
 #define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
 
-# define trace_hardirq_context(p)	0
-# define trace_softirq_context(p)	0
-# define trace_hardirqs_enabled(p)	0
-# define trace_softirqs_enabled(p)	0
-# define trace_hardirq_enter()		do { } while (0)
-# define trace_hardirq_exit()		do { } while (0)
+# define lockdep_hardirq_context(p)	0
+# define lockdep_softirq_context(p)	0
+# define lockdep_hardirqs_enabled(p)	0
+# define lockdep_softirqs_enabled(p)	0
+# define lockdep_hardirq_enter()	do { } while (0)
+# define lockdep_hardirq_exit()		do { } while (0)
 # define lockdep_softirq_enter()	do { } while (0)
 # define lockdep_softirq_exit()		do { } while (0)
 # define INIT_TRACE_IRQFLAGS
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
index ce3c5945a1c4..637189ec1ab9 100644
--- a/tools/include/uapi/asm/errno.h
+++ b/tools/include/uapi/asm/errno.h
@@ -1,18 +1,18 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #if defined(__i386__) || defined(__x86_64__)
-#include "../../arch/x86/include/uapi/asm/errno.h"
+#include "../../../arch/x86/include/uapi/asm/errno.h"
 #elif defined(__powerpc__)
-#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#include "../../../arch/powerpc/include/uapi/asm/errno.h"
 #elif defined(__sparc__)
-#include "../../arch/sparc/include/uapi/asm/errno.h"
+#include "../../../arch/sparc/include/uapi/asm/errno.h"
 #elif defined(__alpha__)
-#include "../../arch/alpha/include/uapi/asm/errno.h"
+#include "../../../arch/alpha/include/uapi/asm/errno.h"
 #elif defined(__mips__)
-#include "../../arch/mips/include/uapi/asm/errno.h"
+#include "../../../arch/mips/include/uapi/asm/errno.h"
 #elif defined(__ia64__)
-#include "../../arch/ia64/include/uapi/asm/errno.h"
+#include "../../../arch/ia64/include/uapi/asm/errno.h"
 #elif defined(__xtensa__)
-#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#include "../../../arch/xtensa/include/uapi/asm/errno.h"
 #else
 #include <asm-generic/errno.h>
 #endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 22f235260a3a..2e29a671d67e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -73,7 +73,7 @@ struct bpf_insn {
 /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
 struct bpf_lpm_trie_key {
 	__u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
-	__u8	data[0];	/* Arbitrary size */
+	__u8	data[];	/* Arbitrary size */
 };
 
 struct bpf_cgroup_storage_key {
@@ -111,6 +111,8 @@ enum bpf_cmd {
 	BPF_MAP_LOOKUP_AND_DELETE_BATCH,
 	BPF_MAP_UPDATE_BATCH,
 	BPF_MAP_DELETE_BATCH,
+	BPF_LINK_CREATE,
+	BPF_LINK_UPDATE,
 };
 
 enum bpf_map_type {
@@ -181,6 +183,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_TRACING,
 	BPF_PROG_TYPE_STRUCT_OPS,
 	BPF_PROG_TYPE_EXT,
+	BPF_PROG_TYPE_LSM,
 };
 
 enum bpf_attach_type {
@@ -210,6 +213,8 @@ enum bpf_attach_type {
 	BPF_TRACE_RAW_TP,
 	BPF_TRACE_FENTRY,
 	BPF_TRACE_FEXIT,
+	BPF_MODIFY_RETURN,
+	BPF_LSM_MAC,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -325,44 +330,46 @@ enum bpf_attach_type {
 #define BPF_PSEUDO_CALL		1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
-#define BPF_ANY		0 /* create new element or update existing */
-#define BPF_NOEXIST	1 /* create new element if it didn't exist */
-#define BPF_EXIST	2 /* update existing element */
-#define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
+enum {
+	BPF_ANY		= 0, /* create new element or update existing */
+	BPF_NOEXIST	= 1, /* create new element if it didn't exist */
+	BPF_EXIST	= 2, /* update existing element */
+	BPF_F_LOCK	= 4, /* spin_lock-ed map_lookup/map_update */
+};
 
 /* flags for BPF_MAP_CREATE command */
-#define BPF_F_NO_PREALLOC	(1U << 0)
+enum {
+	BPF_F_NO_PREALLOC	= (1U << 0),
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
  * which can scale and perform better.
  * Note, the LRU nodes (including free nodes) cannot be moved
  * across different LRU lists.
  */
-#define BPF_F_NO_COMMON_LRU	(1U << 1)
+	BPF_F_NO_COMMON_LRU	= (1U << 1),
 /* Specify numa node during map creation */
-#define BPF_F_NUMA_NODE		(1U << 2)
-
-#define BPF_OBJ_NAME_LEN 16U
+	BPF_F_NUMA_NODE		= (1U << 2),
 
 /* Flags for accessing BPF object from syscall side. */
-#define BPF_F_RDONLY		(1U << 3)
-#define BPF_F_WRONLY		(1U << 4)
+	BPF_F_RDONLY		= (1U << 3),
+	BPF_F_WRONLY		= (1U << 4),
 
 /* Flag for stack_map, store build_id+offset instead of pointer */
-#define BPF_F_STACK_BUILD_ID	(1U << 5)
+	BPF_F_STACK_BUILD_ID	= (1U << 5),
 
 /* Zero-initialize hash function seed. This should only be used for testing. */
-#define BPF_F_ZERO_SEED		(1U << 6)
+	BPF_F_ZERO_SEED		= (1U << 6),
 
 /* Flags for accessing BPF object from program side. */
-#define BPF_F_RDONLY_PROG	(1U << 7)
-#define BPF_F_WRONLY_PROG	(1U << 8)
+	BPF_F_RDONLY_PROG	= (1U << 7),
+	BPF_F_WRONLY_PROG	= (1U << 8),
 
 /* Clone map from listener for newly accepted socket */
-#define BPF_F_CLONE		(1U << 9)
+	BPF_F_CLONE		= (1U << 9),
 
 /* Enable memory-mapping BPF map */
-#define BPF_F_MMAPABLE		(1U << 10)
+	BPF_F_MMAPABLE		= (1U << 10),
+};
 
 /* Flags for BPF_PROG_QUERY. */
 
@@ -391,6 +398,8 @@ struct bpf_stack_build_id {
 	};
 };
 
+#define BPF_OBJ_NAME_LEN 16U
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -534,7 +543,7 @@ union bpf_attr {
 		__u32		prog_cnt;
 	} query;
 
-	struct {
+	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
 		__u64 name;
 		__u32 prog_fd;
 	} raw_tracepoint;
@@ -562,6 +571,24 @@ union bpf_attr {
 		__u64		probe_offset;	/* output: probe_offset */
 		__u64		probe_addr;	/* output: probe_addr */
 	} task_fd_query;
+
+	struct { /* struct used by BPF_LINK_CREATE command */
+		__u32		prog_fd;	/* eBPF program to attach */
+		__u32		target_fd;	/* object to attach to */
+		__u32		attach_type;	/* attach type */
+		__u32		flags;		/* extra flags */
+	} link_create;
+
+	struct { /* struct used by BPF_LINK_UPDATE command */
+		__u32		link_fd;	/* link fd */
+		/* new program fd to update link with */
+		__u32		new_prog_fd;
+		__u32		flags;		/* extra flags */
+		/* expected link's program fd; is specified only if
+		 * BPF_F_REPLACE flag is set in flags */
+		__u32		old_prog_fd;
+	} link_update;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -2890,6 +2917,114 @@ union bpf_attr {
  *		Obtain the 64bit jiffies
  *	Return
  *		The 64 bit jiffies
+ *
+ * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ *	Description
+ *		For an eBPF program attached to a perf event, retrieve the
+ *		branch records (struct perf_branch_entry) associated to *ctx*
+ *		and store it in	the buffer pointed by *buf* up to size
+ *		*size* bytes.
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
+ *		instead	return the number of bytes required to store all the
+ *		branch entries. If this flag is set, *buf* may be NULL.
+ *
+ *		**-EINVAL** if arguments invalid or **size** not a multiple
+ *		of sizeof(struct perf_branch_entry).
+ *
+ *		**-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ *	Description
+ *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
+ *		*namespace* will be returned in *nsdata*.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
+ *
+ *		**-ENOENT** if pidns does not exists for the current task.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *	Description
+ *		Write raw *data* blob into a special BPF perf event held by
+ *		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *		event must have the following attributes: **PERF_SAMPLE_RAW**
+ *		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *		The *flags* are used to indicate the index in *map* for which
+ *		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *		to indicate that the index of the current CPU core should be
+ *		used.
+ *
+ *		The value to write, of *size*, is passed through eBPF stack and
+ *		pointed by *data*.
+ *
+ *		*ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *		This helper is similar to **bpf_perf_eventoutput**\ () but
+ *		restricted to raw_tracepoint bpf programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_netns_cookie(void *ctx)
+ * 	Description
+ * 		Retrieve the cookie (generated by the kernel) of the network
+ * 		namespace the input *ctx* is associated with. The network
+ * 		namespace cookie remains stable for its lifetime and provides
+ * 		a global identifier that can be assumed unique. If *ctx* is
+ * 		NULL, then the helper returns the cookie for the initial
+ * 		network namespace. The cookie itself is very similar to that
+ * 		of bpf_get_socket_cookie() helper, but for network namespaces
+ * 		instead of sockets.
+ * 	Return
+ * 		A 8-byte long opaque number.
+ *
+ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
+ * 	Description
+ * 		Return id of cgroup v2 that is ancestor of the cgroup associated
+ * 		with the current task at the *ancestor_level*. The root cgroup
+ * 		is at *ancestor_level* zero and each step down the hierarchy
+ * 		increments the level. If *ancestor_level* == level of cgroup
+ * 		associated with the current task, then return value will be the
+ * 		same as that of **bpf_get_current_cgroup_id**\ ().
+ *
+ * 		The helper is useful to implement policies based on cgroups
+ * 		that are upper in hierarchy than immediate cgroup associated
+ * 		with the current task.
+ *
+ * 		The format of returned id and helper limitations are same as in
+ * 		**bpf_get_current_cgroup_id**\ ().
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ *	Description
+ *		Assign the *sk* to the *skb*. When combined with appropriate
+ *		routing configuration to receive the packet towards the socket,
+ *		will cause *skb* to be delivered to the specified socket.
+ *		Subsequent redirection of *skb* via  **bpf_redirect**\ (),
+ *		**bpf_clone_redirect**\ () or other methods outside of BPF may
+ *		interfere with successful delivery to the socket.
+ *
+ *		This operation is only valid from TC ingress path.
+ *
+ *		The *flags* argument must be zero.
+ *	Return
+ *		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EINVAL**		Unsupported flags specified.
+ *		* **-ENOENT**		Socket is unavailable for assignment.
+ *		* **-ENETUNREACH**	Socket is unreachable (wrong netns).
+ *		* **-EOPNOTSUPP**	Unsupported operation, for example a
+ *					call from outside of TC ingress.
+ *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3010,7 +3145,13 @@ union bpf_attr {
 	FN(probe_read_kernel_str),	\
 	FN(tcp_send_ack),		\
 	FN(send_signal_thread),		\
-	FN(jiffies64),
+	FN(jiffies64),			\
+	FN(read_branch_records),	\
+	FN(get_ns_current_pid_tgid),	\
+	FN(xdp_output),			\
+	FN(get_netns_cookie),		\
+	FN(get_current_ancestor_cgroup_id),	\
+	FN(sk_assign),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3025,69 +3166,100 @@ enum bpf_func_id {
 /* All flags used by eBPF helper functions, placed here. */
 
 /* BPF_FUNC_skb_store_bytes flags. */
-#define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
-#define BPF_F_INVALIDATE_HASH		(1ULL << 1)
+enum {
+	BPF_F_RECOMPUTE_CSUM		= (1ULL << 0),
+	BPF_F_INVALIDATE_HASH		= (1ULL << 1),
+};
 
 /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
  * First 4 bits are for passing the header field size.
  */
-#define BPF_F_HDR_FIELD_MASK		0xfULL
+enum {
+	BPF_F_HDR_FIELD_MASK		= 0xfULL,
+};
 
 /* BPF_FUNC_l4_csum_replace flags. */
-#define BPF_F_PSEUDO_HDR		(1ULL << 4)
-#define BPF_F_MARK_MANGLED_0		(1ULL << 5)
-#define BPF_F_MARK_ENFORCE		(1ULL << 6)
+enum {
+	BPF_F_PSEUDO_HDR		= (1ULL << 4),
+	BPF_F_MARK_MANGLED_0		= (1ULL << 5),
+	BPF_F_MARK_ENFORCE		= (1ULL << 6),
+};
 
 /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-#define BPF_F_INGRESS			(1ULL << 0)
+enum {
+	BPF_F_INGRESS			= (1ULL << 0),
+};
 
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
-#define BPF_F_TUNINFO_IPV6		(1ULL << 0)
+enum {
+	BPF_F_TUNINFO_IPV6		= (1ULL << 0),
+};
 
 /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
-#define BPF_F_SKIP_FIELD_MASK		0xffULL
-#define BPF_F_USER_STACK		(1ULL << 8)
+enum {
+	BPF_F_SKIP_FIELD_MASK		= 0xffULL,
+	BPF_F_USER_STACK		= (1ULL << 8),
 /* flags used by BPF_FUNC_get_stackid only. */
-#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
-#define BPF_F_REUSE_STACKID		(1ULL << 10)
+	BPF_F_FAST_STACK_CMP		= (1ULL << 9),
+	BPF_F_REUSE_STACKID		= (1ULL << 10),
 /* flags used by BPF_FUNC_get_stack only. */
-#define BPF_F_USER_BUILD_ID		(1ULL << 11)
+	BPF_F_USER_BUILD_ID		= (1ULL << 11),
+};
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
-#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
-#define BPF_F_DONT_FRAGMENT		(1ULL << 2)
-#define BPF_F_SEQ_NUMBER		(1ULL << 3)
+enum {
+	BPF_F_ZERO_CSUM_TX		= (1ULL << 1),
+	BPF_F_DONT_FRAGMENT		= (1ULL << 2),
+	BPF_F_SEQ_NUMBER		= (1ULL << 3),
+};
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
-#define BPF_F_INDEX_MASK		0xffffffffULL
-#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+enum {
+	BPF_F_INDEX_MASK		= 0xffffffffULL,
+	BPF_F_CURRENT_CPU		= BPF_F_INDEX_MASK,
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
-#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
+	BPF_F_CTXLEN_MASK		= (0xfffffULL << 32),
+};
 
 /* Current network namespace */
-#define BPF_F_CURRENT_NETNS		(-1L)
+enum {
+	BPF_F_CURRENT_NETNS		= (-1L),
+};
 
 /* BPF_FUNC_skb_adjust_room flags. */
-#define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
+enum {
+	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
+	BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	= (1ULL << 1),
+	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	= (1ULL << 2),
+	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
+	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
+};
 
-#define BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
-#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
+enum {
+	BPF_ADJ_ROOM_ENCAP_L2_MASK	= 0xff,
+	BPF_ADJ_ROOM_ENCAP_L2_SHIFT	= 56,
+};
 
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
 #define BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
 					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
 
 /* BPF_FUNC_sysctl_get_name flags. */
-#define BPF_F_SYSCTL_BASE_NAME		(1ULL << 0)
+enum {
+	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
+};
 
 /* BPF_FUNC_sk_storage_get flags */
-#define BPF_SK_STORAGE_GET_F_CREATE	(1ULL << 0)
+enum {
+	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+};
+
+/* BPF_FUNC_read_branch_records flags. */
+enum {
+	BPF_F_GET_BRANCH_RECORDS_SIZE	= (1ULL << 0),
+};
 
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
@@ -3153,6 +3325,7 @@ struct __sk_buff {
 	__u32 wire_len;
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	__u32 gso_size;
 };
 
 struct bpf_tunnel_key {
@@ -3505,13 +3678,14 @@ struct bpf_sock_ops {
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
-#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
-#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
-#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
-#define BPF_SOCK_OPS_RTT_CB_FLAG	(1<<3)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF		/* Mask of all currently
-							 * supported cb flags
-							 */
+enum {
+	BPF_SOCK_OPS_RTO_CB_FLAG	= (1<<0),
+	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
+	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
+	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
+/* Mask of all currently supported cb flags */
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xF,
+};
 
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
@@ -3590,8 +3764,10 @@ enum {
 	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
 
-#define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
-#define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
+enum {
+	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
+	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
+};
 
 struct bpf_perf_event_value {
 	__u64 counter;
@@ -3599,12 +3775,16 @@ struct bpf_perf_event_value {
 	__u64 running;
 };
 
-#define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
-#define BPF_DEVCG_ACC_READ	(1ULL << 1)
-#define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
+enum {
+	BPF_DEVCG_ACC_MKNOD	= (1ULL << 0),
+	BPF_DEVCG_ACC_READ	= (1ULL << 1),
+	BPF_DEVCG_ACC_WRITE	= (1ULL << 2),
+};
 
-#define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
-#define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
+enum {
+	BPF_DEVCG_DEV_BLOCK	= (1ULL << 0),
+	BPF_DEVCG_DEV_CHAR	= (1ULL << 1),
+};
 
 struct bpf_cgroup_dev_ctx {
 	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@@ -3620,8 +3800,10 @@ struct bpf_raw_tracepoint_args {
 /* DIRECT:  Skip the FIB rules and go to FIB table associated with device
  * OUTPUT:  Do lookup from egress perspective; default is ingress
  */
-#define BPF_FIB_LOOKUP_DIRECT  (1U << 0)
-#define BPF_FIB_LOOKUP_OUTPUT  (1U << 1)
+enum {
+	BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
+	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+};
 
 enum {
 	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
@@ -3693,9 +3875,11 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
-#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
+enum {
+	BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		= (1U << 0),
+	BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		= (1U << 1),
+	BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		= (1U << 2),
+};
 
 struct bpf_flow_keys {
 	__u16	nhoff;
@@ -3761,4 +3945,8 @@ struct bpf_sockopt {
 	__s32	retval;
 };
 
+struct bpf_pidns_info {
+	__u32 pid;
+	__u32 tgid;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 024af2d1d0af..ca6665ea758a 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -463,6 +463,7 @@ enum {
 	IFLA_MACSEC_REPLAY_PROTECT,
 	IFLA_MACSEC_VALIDATION,
 	IFLA_MACSEC_PAD,
+	IFLA_MACSEC_OFFLOAD,
 	__IFLA_MACSEC_MAX,
 };
 
@@ -489,6 +490,7 @@ enum macsec_validation_type {
 enum macsec_offload {
 	MACSEC_OFFLOAD_OFF = 0,
 	MACSEC_OFFLOAD_PHY = 1,
+	MACSEC_OFFLOAD_MAC = 2,
 	__MACSEC_OFFLOAD_END,
 	MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1,
 };
@@ -960,11 +962,12 @@ enum {
 #define XDP_FLAGS_SKB_MODE		(1U << 1)
 #define XDP_FLAGS_DRV_MODE		(1U << 2)
 #define XDP_FLAGS_HW_MODE		(1U << 3)
+#define XDP_FLAGS_REPLACE		(1U << 4)
 #define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
 					 XDP_FLAGS_DRV_MODE | \
 					 XDP_FLAGS_HW_MODE)
 #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
-					 XDP_FLAGS_MODES)
+					 XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
 
 /* These are stored into IFLA_XDP_ATTACHED on dump. */
 enum {
@@ -984,6 +987,7 @@ enum {
 	IFLA_XDP_DRV_PROG_ID,
 	IFLA_XDP_SKB_PROG_ID,
 	IFLA_XDP_HW_PROG_ID,
+	IFLA_XDP_EXPECTED_FD,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 1521073b6348..8533bf07450f 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -74,6 +74,8 @@ enum {
 #define IPPROTO_UDPLITE		IPPROTO_UDPLITE
   IPPROTO_MPLS = 137,		/* MPLS in IP (RFC 4023)		*/
 #define IPPROTO_MPLS		IPPROTO_MPLS
+  IPPROTO_ETHERNET = 143,	/* Ethernet-within-IPv6 Encapsulation	*/
+#define IPPROTO_ETHERNET	IPPROTO_ETHERNET
   IPPROTO_RAW = 255,		/* Raw IP packets			*/
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 377d794d3105..397cfd65b3fe 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift {
 
 	PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT	= 16, /* save branch type */
 
+	PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT	= 17, /* save low level index of raw branch records */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -208,6 +210,8 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_TYPE_SAVE	=
 		1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
 
+	PERF_SAMPLE_BRANCH_HW_INDEX	= 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
@@ -853,7 +857,9 @@ enum perf_event_type {
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 *
 	 *	{ u64                   nr;
-	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *	  { u64	hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+	 *        { u64 from, to, flags } lbr[nr];
+	 *      } && PERF_SAMPLE_BRANCH_STACK
 	 *
 	 * 	{ u64			abi; # enum perf_sample_regs_abi
 	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..91fa51a9c31d 100644
--- a/tools/testing/selftests/bpf/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 4cf93110c259..e83fc8e868f4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -25,7 +25,7 @@ import sys
 import locale
 import os
 import time
-import optparse
+import argparse
 import ctypes
 import fcntl
 import resource
@@ -33,6 +33,8 @@ import struct
 import re
 import subprocess
 from collections import defaultdict, namedtuple
+from functools import reduce
+from datetime import datetime
 
 VMX_EXIT_REASONS = {
     'EXCEPTION_NMI':        0,
@@ -873,7 +875,7 @@ class Stats(object):
 
         if options.debugfs:
             providers.append(DebugfsProvider(options.pid, options.fields,
-                                             options.dbgfs_include_past))
+                                             options.debugfs_include_past))
         if options.tracepoints or not providers:
             providers.append(TracepointProvider(options.pid, options.fields))
 
@@ -974,15 +976,17 @@ DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
 SORT_DEFAULT = 0
+MIN_DELAY = 0.1
+MAX_DELAY = 25.5
 
 
 class Tui(object):
     """Instruments curses to draw a nice text ui."""
-    def __init__(self, stats):
+    def __init__(self, stats, opts):
         self.stats = stats
         self.screen = None
         self._delay_initial = 0.25
-        self._delay_regular = DELAY_DEFAULT
+        self._delay_regular = opts.set_delay
         self._sorting = SORT_DEFAULT
         self._display_guests = 0
 
@@ -1183,7 +1187,7 @@ class Tui(object):
 
         if not self._is_running_guest(self.stats.pid_filter):
             if self._gname:
-                try: # ...to identify the guest by name in case it's back
+                try:  # ...to identify the guest by name in case it's back
                     pids = self.get_pid_from_gname(self._gname)
                     if len(pids) == 1:
                         self._refresh_header(pids[0])
@@ -1282,7 +1286,8 @@ class Tui(object):
                '   p     filter by guest name/PID',
                '   q     quit',
                '   r     reset stats',
-               '   s     set update interval',
+               '   s     set delay between refreshs (value range: '
+               '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
                '   x     toggle reporting of stats for individual child trace'
                ' events',
                'Any other key refreshes statistics immediately')
@@ -1336,8 +1341,8 @@ class Tui(object):
         msg = ''
         while True:
             self.screen.erase()
-            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
-                               DELAY_DEFAULT, curses.A_BOLD)
+            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
+                               % DELAY_DEFAULT, curses.A_BOLD)
             self.screen.addstr(4, 0, msg)
             self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
                                self._delay_regular)
@@ -1348,11 +1353,9 @@ class Tui(object):
             try:
                 if len(val) > 0:
                     delay = float(val)
-                    if delay < 0.1:
-                        msg = '"' + str(val) + '": Value must be >=0.1'
-                        continue
-                    if delay > 25.5:
-                        msg = '"' + str(val) + '": Value must be <=25.5'
+                    err = is_delay_valid(delay)
+                    if err is not None:
+                        msg = err
                         continue
                 else:
                     delay = DELAY_DEFAULT
@@ -1488,33 +1491,64 @@ def batch(stats):
         pass
 
 
-def log(stats):
-    """Prints statistics as reiterating key block, multiple value blocks."""
-    keys = sorted(stats.get().keys())
-
-    def banner():
+class StdFormat(object):
+    def __init__(self, keys):
+        self._banner = ''
         for key in keys:
-            print(key.split(' ')[0], end=' ')
-        print()
+            self._banner += key.split(' ')[0] + ' '
 
-    def statline():
-        s = stats.get()
+    def get_banner(self):
+        return self._banner
+
+    @staticmethod
+    def get_statline(keys, s):
+        res = ''
         for key in keys:
-            print(' %9d' % s[key].delta, end=' ')
-        print()
+            res += ' %9d' % s[key].delta
+        return res
+
+
+class CSVFormat(object):
+    def __init__(self, keys):
+        self._banner = 'timestamp'
+        self._banner += reduce(lambda res, key: "{},{!s}".format(res,
+                               key.split(' ')[0]), keys, '')
+
+    def get_banner(self):
+        return self._banner
+
+    @staticmethod
+    def get_statline(keys, s):
+        return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
+                      keys, '')
+
+
+def log(stats, opts, frmt, keys):
+    """Prints statistics as reiterating key block, multiple value blocks."""
     line = 0
     banner_repeat = 20
     while True:
         try:
-            time.sleep(1)
+            time.sleep(opts.set_delay)
             if line % banner_repeat == 0:
-                banner()
-            statline()
+                print(frmt.get_banner())
+            print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
+                  frmt.get_statline(keys, stats.get()))
             line += 1
         except KeyboardInterrupt:
             break
 
 
+def is_delay_valid(delay):
+    """Verify delay is in valid value range."""
+    msg = None
+    if delay < MIN_DELAY:
+        msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
+    if delay > MAX_DELAY:
+        msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
+    return msg
+
+
 def get_options():
     """Returns processed program arguments."""
     description_text = """
@@ -1545,89 +1579,85 @@ Interactive Commands:
    p     filter by PID
    q     quit
    r     reset stats
-   s     set update interval
+   s     set update interval (value range: 0.1-25.5 secs)
    x     toggle reporting of stats for individual child trace events
 Press any other key to refresh statistics immediately.
 """ % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
 
-    class PlainHelpFormatter(optparse.IndentedHelpFormatter):
-        def format_description(self, description):
-            if description:
-                return description + "\n"
-            else:
-                return ""
-
-    def cb_guest_to_pid(option, opt, val, parser):
-        try:
-            pids = Tui.get_pid_from_gname(val)
-        except:
-            sys.exit('Error while searching for guest "{}". Use "-p" to '
-                     'specify a pid instead?'.format(val))
-        if len(pids) == 0:
-            sys.exit('Error: No guest by the name "{}" found'.format(val))
-        if len(pids) > 1:
-            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
-                     'to specify the desired pid'.format(" ".join(pids)))
-        parser.values.pid = pids[0]
-
-    optparser = optparse.OptionParser(description=description_text,
-                                      formatter=PlainHelpFormatter())
-    optparser.add_option('-1', '--once', '--batch',
-                         action='store_true',
-                         default=False,
-                         dest='once',
-                         help='run in batch mode for one second',
-                         )
-    optparser.add_option('-i', '--debugfs-include-past',
-                         action='store_true',
-                         default=False,
-                         dest='dbgfs_include_past',
-                         help='include all available data on past events for '
-                              'debugfs',
-                         )
-    optparser.add_option('-l', '--log',
-                         action='store_true',
-                         default=False,
-                         dest='log',
-                         help='run in logging mode (like vmstat)',
-                         )
-    optparser.add_option('-t', '--tracepoints',
-                         action='store_true',
-                         default=False,
-                         dest='tracepoints',
-                         help='retrieve statistics from tracepoints',
-                         )
-    optparser.add_option('-d', '--debugfs',
-                         action='store_true',
-                         default=False,
-                         dest='debugfs',
-                         help='retrieve statistics from debugfs',
-                         )
-    optparser.add_option('-f', '--fields',
-                         action='store',
-                         default='',
-                         dest='fields',
-                         help='''fields to display (regex)
-                                 "-f help" for a list of available events''',
-                         )
-    optparser.add_option('-p', '--pid',
-                         action='store',
-                         default=0,
-                         type='int',
-                         dest='pid',
-                         help='restrict statistics to pid',
-                         )
-    optparser.add_option('-g', '--guest',
-                         action='callback',
-                         type='string',
-                         dest='pid',
-                         metavar='GUEST',
-                         help='restrict statistics to guest by name',
-                         callback=cb_guest_to_pid,
-                         )
-    options, unkn = optparser.parse_args(sys.argv)
-    if len(unkn) != 1:
-        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    class Guest_to_pid(argparse.Action):
+        def __call__(self, parser, namespace, values, option_string=None):
+            try:
+                pids = Tui.get_pid_from_gname(values)
+            except:
+                sys.exit('Error while searching for guest "{}". Use "-p" to '
+                         'specify a pid instead?'.format(values))
+            if len(pids) == 0:
+                sys.exit('Error: No guest by the name "{}" found'
+                         .format(values))
+            if len(pids) > 1:
+                sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
+                         ' to specify the desired pid'.format(" ".join(pids)))
+            namespace.pid = pids[0]
+
+    argparser = argparse.ArgumentParser(description=description_text,
+                                        formatter_class=argparse
+                                        .RawTextHelpFormatter)
+    argparser.add_argument('-1', '--once', '--batch',
+                           action='store_true',
+                           default=False,
+                           help='run in batch mode for one second',
+                           )
+    argparser.add_argument('-c', '--csv',
+                           action='store_true',
+                           default=False,
+                           help='log in csv format - requires option -l/--log',
+                           )
+    argparser.add_argument('-d', '--debugfs',
+                           action='store_true',
+                           default=False,
+                           help='retrieve statistics from debugfs',
+                           )
+    argparser.add_argument('-f', '--fields',
+                           default='',
+                           help='''fields to display (regex)
+"-f help" for a list of available events''',
+                           )
+    argparser.add_argument('-g', '--guest',
+                           type=str,
+                           help='restrict statistics to guest by name',
+                           action=Guest_to_pid,
+                           )
+    argparser.add_argument('-i', '--debugfs-include-past',
+                           action='store_true',
+                           default=False,
+                           help='include all available data on past events for'
+                                ' debugfs',
+                           )
+    argparser.add_argument('-l', '--log',
+                           action='store_true',
+                           default=False,
+                           help='run in logging mode (like vmstat)',
+                           )
+    argparser.add_argument('-p', '--pid',
+                           type=int,
+                           default=0,
+                           help='restrict statistics to pid',
+                           )
+    argparser.add_argument('-s', '--set-delay',
+                           type=float,
+                           default=DELAY_DEFAULT,
+                           metavar='DELAY',
+                           help='set delay between refreshs (value range: '
+                                '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
+                           )
+    argparser.add_argument('-t', '--tracepoints',
+                           action='store_true',
+                           default=False,
+                           help='retrieve statistics from tracepoints',
+                           )
+    options = argparser.parse_args()
+    if options.csv and not options.log:
+        sys.exit('Error: Option -c/--csv requires -l/--log')
     try:
         # verify that we were passed a valid regex up front
         re.compile(options.fields)
@@ -1693,6 +1723,10 @@ def main():
         sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
         sys.exit('Specified pid does not exist.')
 
+    err = is_delay_valid(options.set_delay)
+    if err is not None:
+        sys.exit('Error: ' + err)
+
     stats = Stats(options)
 
     if options.fields == 'help':
@@ -1704,12 +1738,18 @@ def main():
         sys.exit(0)
 
     if options.log:
-        log(stats)
+        keys = sorted(stats.get().keys())
+        if options.csv:
+            frmt = CSVFormat(keys)
+        else:
+            frmt = StdFormat(keys)
+        log(stats, options, frmt, keys)
     elif not options.once:
-        with Tui(stats) as tui:
+        with Tui(stats, options) as tui:
             tui.show_stats()
     else:
         batch(stats)
 
+
 if __name__ == "__main__":
     main()
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index c057ba52364e..a97ded2aedad 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -49,7 +49,7 @@ INTERACTIVE COMMANDS
 
 *r*::	reset stats
 
-*s*::   set update interval
+*s*::   set delay between refreshs
 
 *x*::	toggle reporting of stats for child trace events
  ::     *Note*: The stats for the parents summarize the respective child trace
@@ -64,37 +64,45 @@ OPTIONS
 --batch::
 	run in batch mode for one second
 
--l::
---log::
-	run in logging mode (like vmstat)
-
--t::
---tracepoints::
-	retrieve statistics from tracepoints
+-c::
+--csv=<file>::
+        log in csv format - requires option -l/--log
 
 -d::
 --debugfs::
 	retrieve statistics from debugfs
 
+-f<fields>::
+--fields=<fields>::
+        fields to display (regex), "-f help" for a list of available events
+
+-g<guest>::
+--guest=<guest_name>::
+        limit statistics to one virtual machine (guest name)
+
+-h::
+--help::
+        show help message
+
 -i::
 --debugfs-include-past::
 	include all available data on past events for debugfs
 
+-l::
+--log::
+        run in logging mode (like vmstat)
+
 -p<pid>::
 --pid=<pid>::
 	limit statistics to one virtual machine (pid)
 
--g<guest>::
---guest=<guest_name>::
-	limit statistics to one virtual machine (guest name)
+-s::
+--set-delay::
+        set delay between refreshs (value range: 0.1-25.5 secs)
 
--f<fields>::
---fields=<fields>::
-	fields to display (regex), "-f help" for a list of available events
-
--h::
---help::
-	show help message
+-t::
+--tracepoints::
+        retrieve statistics from tracepoints
 
 SEE ALSO
 --------
diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore
index 9fc984e64386..f7f1296b96ae 100644
--- a/tools/laptop/dslm/.gitignore
+++ b/tools/laptop/dslm/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 dslm
diff --git a/tools/leds/.gitignore b/tools/leds/.gitignore
index ac96d9f53dfc..06bd3ee1b7c9 100644
--- a/tools/leds/.gitignore
+++ b/tools/leds/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 uledmon
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
index f4ed9629ae85..0f75b28654de 100644
--- a/tools/lib/api/fs/Build
+++ b/tools/lib/api/fs/Build
@@ -1,2 +1,3 @@
 libapi-y += fs.o
 libapi-y += tracing_path.o
+libapi-y += cgroup.o
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
new file mode 100644
index 000000000000..889a6eb4aaca
--- /dev/null
+++ b/tools/lib/api/fs/cgroup.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fs.h"
+
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
+{
+	FILE *fp;
+	char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+	char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+	char *token, *saved_ptr = NULL;
+
+	fp = fopen("/proc/mounts", "r");
+	if (!fp)
+		return -1;
+
+	/*
+	 * in order to handle split hierarchy, we need to scan /proc/mounts
+	 * and inspect every cgroupfs mount point to find one that has
+	 * perf_event subsystem
+	 */
+	path_v1[0] = '\0';
+	path_v2[0] = '\0';
+
+	while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+				__stringify(PATH_MAX)"s %*d %*d\n",
+				mountpoint, type, tokens) == 3) {
+
+		if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+			token = strtok_r(tokens, ",", &saved_ptr);
+
+			while (token != NULL) {
+				if (subsys && !strcmp(token, subsys)) {
+					strcpy(path_v1, mountpoint);
+					break;
+				}
+				token = strtok_r(NULL, ",", &saved_ptr);
+			}
+		}
+
+		if (!path_v2[0] && !strcmp(type, "cgroup2"))
+			strcpy(path_v2, mountpoint);
+
+		if (path_v1[0] && path_v2[0])
+			break;
+	}
+	fclose(fp);
+
+	if (path_v1[0])
+		path = path_v1;
+	else if (path_v2[0])
+		path = path_v2;
+	else
+		return -1;
+
+	if (strlen(path) < maxlen) {
+		strcpy(buf, path);
+		return 0;
+	}
+	return -1;
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 92d03b8396b1..936edb95e1f3 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -28,6 +28,8 @@ FS(bpf_fs)
 #undef FS
 
 
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys);
+
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_xll(const char *filename, unsigned long long *value);
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index e97c2ebcf447..8a81b3679d2b 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 libbpf_version.h
 libbpf.pc
 FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c6dafe563176..5cc1b0785d18 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -235,7 +235,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
-	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    attr.prog_type == BPF_PROG_TYPE_LSM) {
 		attr.attach_btf_id = load_attr->attach_btf_id;
 	} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
 		   attr.prog_type == BPF_PROG_TYPE_EXT) {
@@ -584,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
 }
 
+int bpf_link_create(int prog_fd, int target_fd,
+		    enum bpf_attach_type attach_type,
+		    const struct bpf_link_create_opts *opts)
+{
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_link_create_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_create.prog_fd = prog_fd;
+	attr.link_create.target_fd = target_fd;
+	attr.link_create.attach_type = attach_type;
+
+	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_link_update(int link_fd, int new_prog_fd,
+		    const struct bpf_link_update_opts *opts)
+{
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_link_update_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_update.link_fd = link_fd;
+	attr.link_update.new_prog_fd = new_prog_fd;
+	attr.link_update.flags = OPTS_GET(opts, flags, 0);
+	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+
+	return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+}
+
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index b976e77316cc..46d47afdd887 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 				enum bpf_attach_type type);
 
+struct bpf_link_create_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_link_create_opts__last_field sz
+
+LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
+			       enum bpf_attach_type attach_type,
+			       const struct bpf_link_create_opts *opts);
+
+struct bpf_link_update_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;	   /* extra flags */
+	__u32 old_prog_fd; /* expected old program FD */
+};
+#define bpf_link_update_opts__last_field old_prog_fd
+
+LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
+			       const struct bpf_link_update_opts *opts);
+
 struct bpf_prog_test_run_attr {
 	int prog_fd;
 	int repeat;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index b0dafe8b4ebc..f3f3c3fb98cb 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -49,7 +49,8 @@
 
 #if defined(bpf_target_x86)
 
-#ifdef __KERNEL__
+#if defined(__KERNEL__) || defined(__VMLINUX_H__)
+
 #define PT_REGS_PARM1(x) ((x)->di)
 #define PT_REGS_PARM2(x) ((x)->si)
 #define PT_REGS_PARM3(x) ((x)->dx)
@@ -60,7 +61,20 @@
 #define PT_REGS_RC(x) ((x)->ax)
 #define PT_REGS_SP(x) ((x)->sp)
 #define PT_REGS_IP(x) ((x)->ip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+
 #else
+
 #ifdef __i386__
 /* i386 kernel is built with -mregparm=3 */
 #define PT_REGS_PARM1(x) ((x)->eax)
@@ -73,7 +87,20 @@
 #define PT_REGS_RC(x) ((x)->eax)
 #define PT_REGS_SP(x) ((x)->esp)
 #define PT_REGS_IP(x) ((x)->eip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
+#define PT_REGS_PARM4_CORE(x) 0
+#define PT_REGS_PARM5_CORE(x) 0
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
+
 #else
+
 #define PT_REGS_PARM1(x) ((x)->rdi)
 #define PT_REGS_PARM2(x) ((x)->rsi)
 #define PT_REGS_PARM3(x) ((x)->rdx)
@@ -84,6 +111,18 @@
 #define PT_REGS_RC(x) ((x)->rax)
 #define PT_REGS_SP(x) ((x)->rsp)
 #define PT_REGS_IP(x) ((x)->rip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
+
 #endif
 #endif
 
@@ -104,6 +143,17 @@ struct pt_regs;
 #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
 #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr)
+
 #elif defined(bpf_target_arm)
 
 #define PT_REGS_PARM1(x) ((x)->uregs[0])
@@ -117,6 +167,17 @@ struct pt_regs;
 #define PT_REGS_SP(x) ((x)->uregs[13])
 #define PT_REGS_IP(x) ((x)->uregs[12])
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+
 #elif defined(bpf_target_arm64)
 
 /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
@@ -134,6 +195,17 @@ struct pt_regs;
 #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
 #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+
 #elif defined(bpf_target_mips)
 
 #define PT_REGS_PARM1(x) ((x)->regs[4])
@@ -147,6 +219,17 @@ struct pt_regs;
 #define PT_REGS_SP(x) ((x)->regs[29])
 #define PT_REGS_IP(x) ((x)->cp0_epc)
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+
 #elif defined(bpf_target_powerpc)
 
 #define PT_REGS_PARM1(x) ((x)->gpr[3])
@@ -158,6 +241,15 @@ struct pt_regs;
 #define PT_REGS_SP(x) ((x)->sp)
 #define PT_REGS_IP(x) ((x)->nip)
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+
 #elif defined(bpf_target_sparc)
 
 #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
@@ -169,11 +261,22 @@ struct pt_regs;
 #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
 #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
 
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
+
 /* Should this also be a bpf_target check for the sparc case? */
 #if defined(__arch64__)
 #define PT_REGS_IP(x) ((x)->tpc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
 #else
 #define PT_REGS_IP(x) ((x)->pc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
 #endif
 
 #endif
@@ -192,4 +295,122 @@ struct pt_regs;
 			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+	___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+	___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
+ */
+#define BPF_PROG(name, args...)						    \
+name(unsigned long long *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(unsigned long long *ctx, ##args);				    \
+typeof(name(0)) name(unsigned long long *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_ctx_cast(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+	___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+	___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+	___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+	___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+	___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+	___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...)					    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_kprobe_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) \
+	___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) \
+	___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
+ * return value (in addition to `struct pt_regs *ctx`), but no input
+ * arguments, because they will be clobbered by the time probed function
+ * returns.
+ */
+#define BPF_KRETPROBE(name, args...)					    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_kretprobe_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
 #endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3d1c25fc97ae..bfef3d606b54 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -657,22 +657,32 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 
 int btf__load(struct btf *btf)
 {
-	__u32 log_buf_size = BPF_LOG_BUF_SIZE;
+	__u32 log_buf_size = 0;
 	char *log_buf = NULL;
 	int err = 0;
 
 	if (btf->fd >= 0)
 		return -EEXIST;
 
-	log_buf = malloc(log_buf_size);
-	if (!log_buf)
-		return -ENOMEM;
+retry_load:
+	if (log_buf_size) {
+		log_buf = malloc(log_buf_size);
+		if (!log_buf)
+			return -ENOMEM;
 
-	*log_buf = 0;
+		*log_buf = 0;
+	}
 
 	btf->fd = bpf_load_btf(btf->data, btf->data_size,
 			       log_buf, log_buf_size, false);
 	if (btf->fd < 0) {
+		if (!log_buf || errno == ENOSPC) {
+			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
+					   log_buf_size << 1);
+			free(log_buf);
+			goto retry_load;
+		}
+
 		err = -errno;
 		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
 		if (*log_buf)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bd09ed1710f1..0c28ee82834b 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
 			/* enumerators share namespace with typedef idents */
 			dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
 			if (dup_cnt > 1) {
-				btf_dump_printf(d, "\n%s%s___%zu = %d,",
+				btf_dump_printf(d, "\n%s%s___%zu = %u,",
 						pfx(lvl + 1), name, dup_cnt,
-						(__s32)v->val);
+						(__u32)v->val);
 			} else {
-				btf_dump_printf(d, "\n%s%s = %d,",
+				btf_dump_printf(d, "\n%s%s = %u,",
 						pfx(lvl + 1), name,
-						(__s32)v->val);
+						(__u32)v->val);
 			}
 		}
 		btf_dump_printf(d, "\n%s}", pfx(lvl));
@@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
 	if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
 		return -EINVAL;
 
-	fname = OPTS_GET(opts, field_name, NULL);
+	fname = OPTS_GET(opts, field_name, "");
 	lvl = OPTS_GET(opts, indent_level, 0);
 	btf_dump_emit_type_decl(d, id, fname, lvl);
 	return 0;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7469c7dcc15e..ff9174282a8c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1845,7 +1845,6 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
  * type definition, while using only sizeof(void *) space in ELF data section.
  */
 static bool get_map_field_int(const char *map_name, const struct btf *btf,
-			      const struct btf_type *def,
 			      const struct btf_member *m, __u32 *res)
 {
 	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
@@ -1972,19 +1971,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			return -EINVAL;
 		}
 		if (strcmp(name, "type") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, def, m,
+			if (!get_map_field_int(map_name, obj->btf, m,
 					       &map->def.type))
 				return -EINVAL;
 			pr_debug("map '%s': found type = %u.\n",
 				 map_name, map->def.type);
 		} else if (strcmp(name, "max_entries") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, def, m,
+			if (!get_map_field_int(map_name, obj->btf, m,
 					       &map->def.max_entries))
 				return -EINVAL;
 			pr_debug("map '%s': found max_entries = %u.\n",
 				 map_name, map->def.max_entries);
 		} else if (strcmp(name, "map_flags") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, def, m,
+			if (!get_map_field_int(map_name, obj->btf, m,
 					       &map->def.map_flags))
 				return -EINVAL;
 			pr_debug("map '%s': found map_flags = %u.\n",
@@ -1992,8 +1991,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		} else if (strcmp(name, "key_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       &sz))
+			if (!get_map_field_int(map_name, obj->btf, m, &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found key_size = %u.\n",
 				 map_name, sz);
@@ -2035,8 +2033,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		} else if (strcmp(name, "value_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       &sz))
+			if (!get_map_field_int(map_name, obj->btf, m, &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found value_size = %u.\n",
 				 map_name, sz);
@@ -2079,8 +2076,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			__u32 val;
 			int err;
 
-			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       &val))
+			if (!get_map_field_int(map_name, obj->btf, m, &val))
 				return -EINVAL;
 			pr_debug("map '%s': found pinning = %u.\n",
 				 map_name, val);
@@ -2284,11 +2280,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 	}
 }
 
-static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+static bool libbpf_needs_btf(const struct bpf_object *obj)
 {
 	return obj->efile.btf_maps_shndx >= 0 ||
-		obj->efile.st_ops_shndx >= 0 ||
-		obj->nr_extern > 0;
+	       obj->efile.st_ops_shndx >= 0 ||
+	       obj->nr_extern > 0;
+}
+
+static bool kernel_needs_btf(const struct bpf_object *obj)
+{
+	return obj->efile.st_ops_shndx >= 0;
 }
 
 static int bpf_object__init_btf(struct bpf_object *obj,
@@ -2324,7 +2325,7 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 		}
 	}
 out:
-	if (err && bpf_object__is_btf_mandatory(obj)) {
+	if (err && libbpf_needs_btf(obj)) {
 		pr_warn("BTF is required, but is missing or corrupted.\n");
 		return err;
 	}
@@ -2348,7 +2349,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 	btf_ext__free(obj->btf_ext);
 	obj->btf_ext = NULL;
 
-	if (bpf_object__is_btf_mandatory(obj)) {
+	if (libbpf_needs_btf(obj)) {
 		pr_warn("BTF is required, but is missing or corrupted.\n");
 		return -ENOENT;
 	}
@@ -2357,7 +2358,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 
 static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 {
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    prog->type == BPF_PROG_TYPE_LSM)
 		return true;
 
 	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
@@ -2412,7 +2414,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 			obj->btf_ext = NULL;
 		}
 
-		if (bpf_object__is_btf_mandatory(obj))
+		if (kernel_needs_btf(obj))
 			return err;
 	}
 	return 0;
@@ -3868,6 +3870,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 		if (str_is_empty(targ_name))
 			continue;
 
+		t = skip_mods_and_typedefs(targ_btf, i, NULL);
+		if (!btf_is_composite(t) && !btf_is_array(t))
+			continue;
+
 		targ_essent_len = bpf_core_essential_name_len(targ_name);
 		if (targ_essent_len != local_essent_len)
 			continue;
@@ -4846,8 +4852,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 {
 	struct bpf_load_program_attr load_attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
-	int log_buf_size = BPF_LOG_BUF_SIZE;
-	char *log_buf;
+	size_t log_buf_size = 0;
+	char *log_buf = NULL;
 	int btf_fd, ret;
 
 	if (!insns || !insns_cnt)
@@ -4861,7 +4867,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = license;
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    prog->type == BPF_PROG_TYPE_LSM) {
 		load_attr.attach_btf_id = prog->attach_btf_id;
 	} else if (prog->type == BPF_PROG_TYPE_TRACING ||
 		   prog->type == BPF_PROG_TYPE_EXT) {
@@ -4887,22 +4894,28 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.prog_flags = prog->prog_flags;
 
 retry_load:
-	log_buf = malloc(log_buf_size);
-	if (!log_buf)
-		pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
+	if (log_buf_size) {
+		log_buf = malloc(log_buf_size);
+		if (!log_buf)
+			return -ENOMEM;
+
+		*log_buf = 0;
+	}
 
 	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
 
 	if (ret >= 0) {
-		if (load_attr.log_level)
+		if (log_buf && load_attr.log_level)
 			pr_debug("verifier log:\n%s", log_buf);
 		*pfd = ret;
 		ret = 0;
 		goto out;
 	}
 
-	if (errno == ENOSPC) {
-		log_buf_size <<= 1;
+	if (!log_buf || errno == ENOSPC) {
+		log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
+				   log_buf_size << 1);
+
 		free(log_buf);
 		goto retry_load;
 	}
@@ -4945,8 +4958,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
 	int err = 0, fd, i, btf_id;
 
-	if (prog->type == BPF_PROG_TYPE_TRACING ||
-	    prog->type == BPF_PROG_TYPE_EXT) {
+	if ((prog->type == BPF_PROG_TYPE_TRACING ||
+	     prog->type == BPF_PROG_TYPE_LSM ||
+	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
 		btf_id = libbpf_find_attach_btf_id(prog);
 		if (btf_id <= 0)
 			return btf_id;
@@ -6185,6 +6199,7 @@ bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
 }								\
 
 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
@@ -6251,6 +6266,8 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
 				      struct bpf_program *prog);
 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 				     struct bpf_program *prog);
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog);
 
 struct bpf_sec_def {
 	const char *sec;
@@ -6290,6 +6307,10 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_FENTRY,
 		.is_attach_btf = true,
 		.attach_fn = attach_trace),
+	SEC_DEF("fmod_ret/", TRACING,
+		.expected_attach_type = BPF_MODIFY_RETURN,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
 	SEC_DEF("fexit/", TRACING,
 		.expected_attach_type = BPF_TRACE_FEXIT,
 		.is_attach_btf = true,
@@ -6297,6 +6318,10 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("freplace/", EXT,
 		.is_attach_btf = true,
 		.attach_fn = attach_trace),
+	SEC_DEF("lsm/", LSM,
+		.is_attach_btf = true,
+		.expected_attach_type = BPF_LSM_MAC,
+		.attach_fn = attach_lsm),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
@@ -6559,6 +6584,7 @@ invalid_prog:
 }
 
 #define BTF_TRACE_PREFIX "btf_trace_"
+#define BTF_LSM_PREFIX "bpf_lsm_"
 #define BTF_MAX_NAME_SIZE 128
 
 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6586,9 +6612,15 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
 	if (attach_type == BPF_TRACE_RAW_TP)
 		err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
 					      BTF_KIND_TYPEDEF);
+	else if (attach_type == BPF_LSM_MAC)
+		err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
+					      BTF_KIND_FUNC);
 	else
 		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 
+	if (err <= 0)
+		pr_warn("%s is not found in vmlinux BTF\n", name);
+
 	return err;
 }
 
@@ -6661,8 +6693,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
 			err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
 						    name + section_defs[i].len,
 						    attach_type);
-		if (err <= 0)
-			pr_warn("%s is not found in vmlinux BTF\n", name);
 		return err;
 	}
 	pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
@@ -6742,6 +6772,17 @@ void *bpf_map__priv(const struct bpf_map *map)
 	return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
+int bpf_map__set_initial_value(struct bpf_map *map,
+			       const void *data, size_t size)
+{
+	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
+	    size != map->def.value_size || map->fd >= 0)
+		return -EINVAL;
+
+	memcpy(map->mmaped, data, size);
+	return 0;
+}
+
 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
 {
 	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
@@ -6932,9 +6973,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 struct bpf_link {
 	int (*detach)(struct bpf_link *link);
 	int (*destroy)(struct bpf_link *link);
+	char *pin_path;		/* NULL, if not pinned */
+	int fd;			/* hook FD, -1 if not applicable */
 	bool disconnected;
 };
 
+/* Replace link's underlying BPF program with the new one */
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
+{
+	return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+}
+
 /* Release "ownership" of underlying BPF resource (typically, BPF program
  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
  * link, when destructed through bpf_link__destroy() call won't attempt to
@@ -6961,26 +7010,109 @@ int bpf_link__destroy(struct bpf_link *link)
 		err = link->detach(link);
 	if (link->destroy)
 		link->destroy(link);
+	if (link->pin_path)
+		free(link->pin_path);
 	free(link);
 
 	return err;
 }
 
-struct bpf_link_fd {
-	struct bpf_link link; /* has to be at the top of struct */
-	int fd; /* hook FD */
-};
+int bpf_link__fd(const struct bpf_link *link)
+{
+	return link->fd;
+}
+
+const char *bpf_link__pin_path(const struct bpf_link *link)
+{
+	return link->pin_path;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
+{
+	return close(link->fd);
+}
+
+struct bpf_link *bpf_link__open(const char *path)
+{
+	struct bpf_link *link;
+	int fd;
+
+	fd = bpf_obj_get(path);
+	if (fd < 0) {
+		fd = -errno;
+		pr_warn("failed to open link at %s: %d\n", path, fd);
+		return ERR_PTR(fd);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link) {
+		close(fd);
+		return ERR_PTR(-ENOMEM);
+	}
+	link->detach = &bpf_link__detach_fd;
+	link->fd = fd;
+
+	link->pin_path = strdup(path);
+	if (!link->pin_path) {
+		bpf_link__destroy(link);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return link;
+}
+
+int bpf_link__pin(struct bpf_link *link, const char *path)
+{
+	int err;
+
+	if (link->pin_path)
+		return -EBUSY;
+	err = make_parent_dir(path);
+	if (err)
+		return err;
+	err = check_path(path);
+	if (err)
+		return err;
+
+	link->pin_path = strdup(path);
+	if (!link->pin_path)
+		return -ENOMEM;
+
+	if (bpf_obj_pin(link->fd, link->pin_path)) {
+		err = -errno;
+		zfree(&link->pin_path);
+		return err;
+	}
+
+	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
+	return 0;
+}
+
+int bpf_link__unpin(struct bpf_link *link)
+{
+	int err;
+
+	if (!link->pin_path)
+		return -EINVAL;
+
+	err = unlink(link->pin_path);
+	if (err != 0)
+		return -errno;
+
+	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
+	zfree(&link->pin_path);
+	return 0;
+}
 
 static int bpf_link__detach_perf_event(struct bpf_link *link)
 {
-	struct bpf_link_fd *l = (void *)link;
 	int err;
 
-	err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
 	if (err)
 		err = -errno;
 
-	close(l->fd);
+	close(link->fd);
 	return err;
 }
 
@@ -6988,7 +7120,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 						int pfd)
 {
 	char errmsg[STRERR_BUFSIZE];
-	struct bpf_link_fd *link;
+	struct bpf_link *link;
 	int prog_fd, err;
 
 	if (pfd < 0) {
@@ -7006,7 +7138,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 	link = calloc(1, sizeof(*link));
 	if (!link)
 		return ERR_PTR(-ENOMEM);
-	link->link.detach = &bpf_link__detach_perf_event;
+	link->detach = &bpf_link__detach_perf_event;
 	link->fd = pfd;
 
 	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@@ -7025,7 +7157,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 		return ERR_PTR(err);
 	}
-	return (struct bpf_link *)link;
+	return link;
 }
 
 /*
@@ -7313,18 +7445,11 @@ out:
 	return link;
 }
 
-static int bpf_link__detach_fd(struct bpf_link *link)
-{
-	struct bpf_link_fd *l = (void *)link;
-
-	return close(l->fd);
-}
-
 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 						    const char *tp_name)
 {
 	char errmsg[STRERR_BUFSIZE];
-	struct bpf_link_fd *link;
+	struct bpf_link *link;
 	int prog_fd, pfd;
 
 	prog_fd = bpf_program__fd(prog);
@@ -7337,7 +7462,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 	link = calloc(1, sizeof(*link));
 	if (!link)
 		return ERR_PTR(-ENOMEM);
-	link->link.detach = &bpf_link__detach_fd;
+	link->detach = &bpf_link__detach_fd;
 
 	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
 	if (pfd < 0) {
@@ -7349,7 +7474,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 		return ERR_PTR(pfd);
 	}
 	link->fd = pfd;
-	return (struct bpf_link *)link;
+	return link;
 }
 
 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
@@ -7360,10 +7485,11 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
 	return bpf_program__attach_raw_tracepoint(prog, tp_name);
 }
 
-struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+/* Common logic for all BPF program types that attach to a btf_id */
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
 {
 	char errmsg[STRERR_BUFSIZE];
-	struct bpf_link_fd *link;
+	struct bpf_link *link;
 	int prog_fd, pfd;
 
 	prog_fd = bpf_program__fd(prog);
@@ -7376,13 +7502,13 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
 	link = calloc(1, sizeof(*link));
 	if (!link)
 		return ERR_PTR(-ENOMEM);
-	link->link.detach = &bpf_link__detach_fd;
+	link->detach = &bpf_link__detach_fd;
 
 	pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
 	if (pfd < 0) {
 		pfd = -errno;
 		free(link);
-		pr_warn("program '%s': failed to attach to trace: %s\n",
+		pr_warn("program '%s': failed to attach: %s\n",
 			bpf_program__title(prog, false),
 			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
 		return ERR_PTR(pfd);
@@ -7391,12 +7517,68 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
 	return (struct bpf_link *)link;
 }
 
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+	return bpf_program__attach_btf_id(prog);
+}
+
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+{
+	return bpf_program__attach_btf_id(prog);
+}
+
 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 				     struct bpf_program *prog)
 {
 	return bpf_program__attach_trace(prog);
 }
 
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog)
+{
+	return bpf_program__attach_lsm(prog);
+}
+
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+	const struct bpf_sec_def *sec_def;
+	enum bpf_attach_type attach_type;
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, link_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("program '%s': can't attach before loaded\n",
+			bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	attach_type = bpf_program__get_expected_attach_type(prog);
+	if (!attach_type) {
+		sec_def = find_sec_def(bpf_program__title(prog, false));
+		if (sec_def)
+			attach_type = sec_def->attach_type;
+	}
+	link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+	if (link_fd < 0) {
+		link_fd = -errno;
+		free(link);
+		pr_warn("program '%s': failed to attach to cgroup: %s\n",
+			bpf_program__title(prog, false),
+			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(link_fd);
+	}
+	link->fd = link_fd;
+	return link;
+}
+
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
 	const struct bpf_sec_def *sec_def;
@@ -7410,10 +7592,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 
 static int bpf_link__detach_struct_ops(struct bpf_link *link)
 {
-	struct bpf_link_fd *l = (void *)link;
 	__u32 zero = 0;
 
-	if (bpf_map_delete_elem(l->fd, &zero))
+	if (bpf_map_delete_elem(link->fd, &zero))
 		return -errno;
 
 	return 0;
@@ -7422,7 +7603,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
 {
 	struct bpf_struct_ops *st_ops;
-	struct bpf_link_fd *link;
+	struct bpf_link *link;
 	__u32 i, zero = 0;
 	int err;
 
@@ -7454,10 +7635,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
 		return ERR_PTR(err);
 	}
 
-	link->link.detach = bpf_link__detach_struct_ops;
+	link->detach = bpf_link__detach_struct_ops;
 	link->fd = map->fd;
 
-	return (struct bpf_link *)link;
+	return link;
 }
 
 enum bpf_perf_event_ret
@@ -8138,6 +8319,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
 	}
 }
 
+int bpf_program__set_attach_target(struct bpf_program *prog,
+				   int attach_prog_fd,
+				   const char *attach_func_name)
+{
+	int btf_id;
+
+	if (!prog || attach_prog_fd < 0 || !attach_func_name)
+		return -EINVAL;
+
+	if (attach_prog_fd)
+		btf_id = libbpf_find_prog_btf_id(attach_func_name,
+						 attach_prog_fd);
+	else
+		btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
+					       attach_func_name,
+					       prog->expected_attach_type);
+
+	if (btf_id < 0)
+		return btf_id;
+
+	prog->attach_btf_id = btf_id;
+	prog->attach_prog_fd = attach_prog_fd;
+	return 0;
+}
+
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
 {
 	int err = 0, n, len, start, end = -1;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3fe12c9d1f92..44df1d3e7287 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -219,6 +219,13 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
 struct bpf_link;
 
+LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
+LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
+LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
+LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
+					struct bpf_program *prog);
 LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
 LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
@@ -240,11 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog,
 LIBBPF_API struct bpf_link *
 bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 				   const char *tp_name);
-
 LIBBPF_API struct bpf_link *
 bpf_program__attach_trace(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_lsm(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+
 struct bpf_map;
+
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+
 struct bpf_insn;
 
 /*
@@ -316,6 +329,7 @@ LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
@@ -334,10 +348,15 @@ LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
 				      enum bpf_attach_type type);
 
+LIBBPF_API int
+bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
+			       const char *attach_func_name);
+
 LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
@@ -398,6 +417,8 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 				 bpf_map_clear_priv_t clear_priv);
 LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
+					  const void *data, size_t size);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
@@ -435,7 +456,15 @@ struct xdp_link_info {
 	__u8 attach_mode;
 };
 
+struct bpf_xdp_set_link_opts {
+	size_t sz;
+	__u32 old_fd;
+};
+#define bpf_xdp_set_link_opts__last_field old_fd
+
 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+					const struct bpf_xdp_set_link_opts *opts);
 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 				     size_t info_size, __u32 flags);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b035122142bb..bb8831605b25 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -235,3 +235,22 @@ LIBBPF_0.0.7 {
 		btf__align_of;
 		libbpf_find_kernel_btf;
 } LIBBPF_0.0.6;
+
+LIBBPF_0.0.8 {
+	global:
+		bpf_link__fd;
+		bpf_link__open;
+		bpf_link__pin;
+		bpf_link__pin_path;
+		bpf_link__unpin;
+		bpf_link__update_program;
+		bpf_link_create;
+		bpf_link_update;
+		bpf_map__set_initial_value;
+		bpf_program__attach_cgroup;
+		bpf_program__attach_lsm;
+		bpf_program__is_lsm;
+		bpf_program__set_attach_target;
+		bpf_program__set_lsm;
+		bpf_set_link_xdp_fd_opts;
+} LIBBPF_0.0.7;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index b782ebef6ac9..2c92059c0c90 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -108,6 +108,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_TRACING:
 	case BPF_PROG_TYPE_STRUCT_OPS:
 	case BPF_PROG_TYPE_EXT:
+	case BPF_PROG_TYPE_LSM:
 	default:
 		break;
 	}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 431bd25c6cdb..18b5319025e1 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -132,7 +132,8 @@ done:
 	return ret;
 }
 
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+					 __u32 flags)
 {
 	int sock, seq = 0, ret;
 	struct nlattr *nla, *nla_xdp;
@@ -178,6 +179,14 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 		nla->nla_len += nla_xdp->nla_len;
 	}
 
+	if (flags & XDP_FLAGS_REPLACE) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
 	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
 
 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
@@ -191,6 +200,29 @@ cleanup:
 	return ret;
 }
 
+int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+			     const struct bpf_xdp_set_link_opts *opts)
+{
+	int old_fd = -1;
+
+	if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
+		return -EINVAL;
+
+	if (OPTS_HAS(opts, old_fd)) {
+		old_fd = OPTS_GET(opts, old_fd, -1);
+		flags |= XDP_FLAGS_REPLACE;
+	}
+
+	return __bpf_set_link_xdp_fd_replace(ifindex, fd,
+					     old_fd,
+					     flags);
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+}
+
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
 			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
 {
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 9807903f121e..f7f4efb70a4c 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -280,7 +280,11 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	fill->consumer = map + off.fr.consumer;
 	fill->flags = map + off.fr.flags;
 	fill->ring = map + off.fr.desc;
-	fill->cached_cons = umem->config.fill_size;
+	fill->cached_prod = *fill->producer;
+	/* cached_cons is "size" bigger than the real consumer pointer
+	 * See xsk_prod_nb_free
+	 */
+	fill->cached_cons = *fill->consumer + umem->config.fill_size;
 
 	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
@@ -297,6 +301,8 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	comp->consumer = map + off.cr.consumer;
 	comp->flags = map + off.cr.flags;
 	comp->ring = map + off.cr.desc;
+	comp->cached_prod = *comp->producer;
+	comp->cached_cons = *comp->consumer;
 
 	*umem_ptr = umem;
 	return 0;
@@ -672,6 +678,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		rx->consumer = rx_map + off.rx.consumer;
 		rx->flags = rx_map + off.rx.flags;
 		rx->ring = rx_map + off.rx.desc;
+		rx->cached_prod = *rx->producer;
+		rx->cached_cons = *rx->consumer;
 	}
 	xsk->rx = rx;
 
@@ -691,7 +699,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		tx->consumer = tx_map + off.tx.consumer;
 		tx->flags = tx_map + off.tx.flags;
 		tx->ring = tx_map + off.tx.desc;
-		tx->cached_cons = xsk->config.tx_size;
+		tx->cached_prod = *tx->producer;
+		/* cached_cons is r->size bigger than the real consumer pointer
+		 * See xsk_prod_nb_free
+		 */
+		tx->cached_cons = *tx->consumer + xsk->config.tx_size;
 	}
 	xsk->tx = tx;
 
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
index cc0e7a9f99e3..6c308ac4388c 100644
--- a/tools/lib/lockdep/.gitignore
+++ b/tools/lib/lockdep/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 liblockdep.so.*
diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c
new file mode 100644
index 000000000000..6085693571ef
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/counting.c
@@ -0,0 +1,83 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+                         const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+	int count = 100000, err = 0;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_thread_map *threads;
+	struct perf_counts_values counts;
+
+	struct perf_event_attr attr1 = {
+		.type        = PERF_TYPE_SOFTWARE,
+		.config      = PERF_COUNT_SW_CPU_CLOCK,
+		.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+		.disabled    = 1,
+	};
+	struct perf_event_attr attr2 = {
+		.type        = PERF_TYPE_SOFTWARE,
+		.config      = PERF_COUNT_SW_TASK_CLOCK,
+		.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+		.disabled    = 1,
+	};
+
+	libperf_init(libperf_print);
+	threads = perf_thread_map__new_dummy();
+	if (!threads) {
+		fprintf(stderr, "failed to create threads\n");
+		return -1;
+	}
+	perf_thread_map__set_pid(threads, 0, 0);
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		fprintf(stderr, "failed to create evlist\n");
+		goto out_threads;
+	}
+	evsel = perf_evsel__new(&attr1);
+	if (!evsel) {
+		fprintf(stderr, "failed to create evsel1\n");
+		goto out_evlist;
+	}
+	perf_evlist__add(evlist, evsel);
+	evsel = perf_evsel__new(&attr2);
+	if (!evsel) {
+		fprintf(stderr, "failed to create evsel2\n");
+		goto out_evlist;
+	}
+	perf_evlist__add(evlist, evsel);
+	perf_evlist__set_maps(evlist, NULL, threads);
+	err = perf_evlist__open(evlist);
+	if (err) {
+		fprintf(stderr, "failed to open evsel\n");
+		goto out_evlist;
+	}
+	perf_evlist__enable(evlist);
+	while (count--);
+	perf_evlist__disable(evlist);
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		perf_evsel__read(evsel, 0, 0, &counts);
+		fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
+				counts.val, counts.ena, counts.run);
+	}
+	perf_evlist__close(evlist);
+out_evlist:
+	perf_evlist__delete(evlist);
+out_threads:
+	perf_thread_map__put(threads);
+	return err;
+}
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 9e9f25fb1922..7123c70b9ebc 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 TRACEEVENT-CFLAGS
 libtraceevent-dynamic-list
 libtraceevent.so.*
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index beaa8b8c08ff..e1bd2a93c6db 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5541,7 +5541,7 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
 	if (p10 > 1 && p10 < time)
 		trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
 	else
-		trace_seq_printf(s, "%12llu\n", time);
+		trace_seq_printf(s, "%12llu", time);
 }
 
 struct print_event_type {
diff --git a/tools/memory-model/.gitignore b/tools/memory-model/.gitignore
index b1d34c52f3c3..cf4cd66d8fbf 100644
--- a/tools/memory-model/.gitignore
+++ b/tools/memory-model/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 litmus
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
index 6e2ddc54152f..c492a1ddad91 100644
--- a/tools/memory-model/litmus-tests/.gitignore
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.litmus.out
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 914cff12899b..45cefda24c7b 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 arch/x86/lib/inat-tables.c
 objtool
 fixdep
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 8dc4f0848362..66f44f5cd2a6 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -11,6 +11,7 @@ objtool-y += objtool.o
 objtool-y += libstring.o
 objtool-y += libctype.o
 objtool-y += str_error_r.o
+objtool-y += librbtree.o
 
 CFLAGS += -I$(srctree)/tools/lib
 
@@ -25,3 +26,7 @@ $(OUTPUT)libctype.o: ../lib/ctype.c FORCE
 $(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)librbtree.o: ../lib/rbtree.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c807984a03c1..10fbe75ab43d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -17,7 +17,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -31,6 +31,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
 	OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
 	OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+	OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index a32736f8d2a4..0b907902ee79 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -8,7 +8,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4768d91c6d68..8dd01f986fbb 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -72,22 +72,22 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
-#define func_for_each_insn_all(file, func, insn)			\
+#define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn;							\
 	     insn = next_insn_same_func(file, insn))
 
-#define func_for_each_insn(file, func, insn)				\
-	for (insn = find_insn(file, func->sec, func->offset);		\
+#define sym_for_each_insn(file, sym, insn)				\
+	for (insn = find_insn(file, sym->sec, sym->offset);		\
 	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == func->sec &&				\
-		insn->offset < func->offset + func->len;		\
+		insn->sec == sym->sec &&				\
+		insn->offset < sym->offset + sym->len;			\
 	     insn = list_next_entry(insn, list))
 
-#define func_for_each_insn_continue_reverse(file, func, insn)		\
+#define sym_for_each_insn_continue_reverse(file, sym, insn)		\
 	for (insn = list_prev_entry(insn, list);			\
 	     &insn->list != &file->insn_list &&				\
-		insn->sec == func->sec && insn->offset >= func->offset;	\
+		insn->sec == sym->sec && insn->offset >= sym->offset;	\
 	     insn = list_prev_entry(insn, list))
 
 #define sec_for_each_insn_from(file, insn)				\
@@ -97,14 +97,19 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static bool is_static_jump(struct instruction *insn)
+{
+	return insn->type == INSN_JUMP_CONDITIONAL ||
+	       insn->type == INSN_JUMP_UNCONDITIONAL;
+}
+
 static bool is_sibling_call(struct instruction *insn)
 {
 	/* An indirect jump is either a sibling call or a jump to a table. */
 	if (insn->type == INSN_JUMP_DYNAMIC)
 		return list_empty(&insn->alts);
 
-	if (insn->type != INSN_JUMP_CONDITIONAL &&
-	    insn->type != INSN_JUMP_UNCONDITIONAL)
+	if (!is_static_jump(insn))
 		return false;
 
 	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
@@ -165,7 +170,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 	if (!insn->func)
 		return false;
 
-	func_for_each_insn_all(file, func, insn) {
+	func_for_each_insn(file, func, insn) {
 		empty = false;
 
 		if (insn->type == INSN_RETURN)
@@ -180,7 +185,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 	 * case, the function's dead-end status depends on whether the target
 	 * of the sibling call returns.
 	 */
-	func_for_each_insn_all(file, func, insn) {
+	func_for_each_insn(file, func, insn) {
 		if (is_sibling_call(insn)) {
 			struct instruction *dest = insn->jump_dest;
 
@@ -234,6 +239,7 @@ static int decode_instructions(struct objtool_file *file)
 	struct symbol *func;
 	unsigned long offset;
 	struct instruction *insn;
+	unsigned long nr_insns = 0;
 	int ret;
 
 	for_each_sec(file, sec) {
@@ -269,6 +275,7 @@ static int decode_instructions(struct objtool_file *file)
 
 			hash_add(file->insn_hash, &insn->hash, insn->offset);
 			list_add_tail(&insn->list, &file->insn_list);
+			nr_insns++;
 		}
 
 		list_for_each_entry(func, &sec->symbol_list, list) {
@@ -281,11 +288,14 @@ static int decode_instructions(struct objtool_file *file)
 				return -1;
 			}
 
-			func_for_each_insn(file, func, insn)
+			sym_for_each_insn(file, func, insn)
 				insn->func = func;
 		}
 	}
 
+	if (stats)
+		printf("nr_insns: %lu\n", nr_insns);
+
 	return 0;
 
 err:
@@ -415,8 +425,8 @@ static void add_ignores(struct objtool_file *file)
 			break;
 
 		case STT_SECTION:
-			func = find_symbol_by_offset(rela->sym->sec, rela->addend);
-			if (!func || func->type != STT_FUNC)
+			func = find_func_by_offset(rela->sym->sec, rela->addend);
+			if (!func)
 				continue;
 			break;
 
@@ -425,7 +435,7 @@ static void add_ignores(struct objtool_file *file)
 			continue;
 		}
 
-		func_for_each_insn_all(file, func, insn)
+		func_for_each_insn(file, func, insn)
 			insn->ignore = true;
 	}
 }
@@ -478,6 +488,7 @@ static const char *uaccess_safe_builtin[] = {
 	"__sanitizer_cov_trace_cmp2",
 	"__sanitizer_cov_trace_cmp4",
 	"__sanitizer_cov_trace_cmp8",
+	"__sanitizer_cov_trace_switch",
 	/* UBSAN */
 	"ubsan_type_mismatch_common",
 	"__ubsan_handle_type_mismatch",
@@ -553,15 +564,14 @@ static int add_jump_destinations(struct objtool_file *file)
 	unsigned long dest_off;
 
 	for_each_insn(file, insn) {
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
+		if (!is_static_jump(insn))
 			continue;
 
 		if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
 			continue;
 
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
+		rela = find_rela_by_dest_range(file->elf, insn->sec,
+					       insn->offset, insn->len);
 		if (!rela) {
 			dest_sec = insn->sec;
 			dest_off = insn->offset + insn->len + insn->immediate;
@@ -657,14 +667,18 @@ static int add_call_destinations(struct objtool_file *file)
 		if (insn->type != INSN_CALL)
 			continue;
 
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
+		rela = find_rela_by_dest_range(file->elf, insn->sec,
+					       insn->offset, insn->len);
 		if (!rela) {
 			dest_off = insn->offset + insn->len + insn->immediate;
-			insn->call_dest = find_symbol_by_offset(insn->sec,
-								dest_off);
+			insn->call_dest = find_func_by_offset(insn->sec, dest_off);
+			if (!insn->call_dest)
+				insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
 
-			if (!insn->call_dest && !insn->ignore) {
+			if (insn->ignore)
+				continue;
+
+			if (!insn->call_dest) {
 				WARN_FUNC("unsupported intra-function call",
 					  insn->sec, insn->offset);
 				if (retpoline)
@@ -672,11 +686,16 @@ static int add_call_destinations(struct objtool_file *file)
 				return -1;
 			}
 
+			if (insn->func && insn->call_dest->type != STT_FUNC) {
+				WARN_FUNC("unsupported call to non-function",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
 		} else if (rela->sym->type == STT_SECTION) {
-			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
-								rela->addend+4);
-			if (!insn->call_dest ||
-			    insn->call_dest->type != STT_FUNC) {
+			insn->call_dest = find_func_by_offset(rela->sym->sec,
+							      rela->addend+4);
+			if (!insn->call_dest) {
 				WARN_FUNC("can't find call dest symbol at %s+0x%x",
 					  insn->sec, insn->offset,
 					  rela->sym->sec->name,
@@ -764,8 +783,28 @@ static int handle_group_alt(struct objtool_file *file,
 		insn->ignore = orig_insn->ignore_alts;
 		insn->func = orig_insn->func;
 
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
+		/*
+		 * Since alternative replacement code is copy/pasted by the
+		 * kernel after applying relocations, generally such code can't
+		 * have relative-address relocation references to outside the
+		 * .altinstr_replacement section, unless the arch's
+		 * alternatives code can adjust the relative offsets
+		 * accordingly.
+		 *
+		 * The x86 alternatives code adjusts the offsets only when it
+		 * encounters a branch instruction at the very beginning of the
+		 * replacement group.
+		 */
+		if ((insn->offset != special_alt->new_off ||
+		    (insn->type != INSN_CALL && !is_static_jump(insn))) &&
+		    find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+
+			WARN_FUNC("unsupported relocation in alternatives section",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		if (!is_static_jump(insn))
 			continue;
 
 		if (!insn->immediate)
@@ -1001,7 +1040,7 @@ static struct rela *find_jump_table(struct objtool_file *file,
 				      struct instruction *insn)
 {
 	struct rela *text_rela, *table_rela;
-	struct instruction *orig_insn = insn;
+	struct instruction *dest_insn, *orig_insn = insn;
 	struct section *table_sec;
 	unsigned long table_offset;
 
@@ -1028,8 +1067,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
 		    break;
 
 		/* look for a relocation which references .rodata */
-		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
-						    insn->len);
+		text_rela = find_rela_by_dest_range(file->elf, insn->sec,
+						    insn->offset, insn->len);
 		if (!text_rela || text_rela->sym->type != STT_SECTION ||
 		    !text_rela->sym->sec->rodata)
 			continue;
@@ -1053,10 +1092,17 @@ static struct rela *find_jump_table(struct objtool_file *file,
 		    strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
 			continue;
 
-		/* Each table entry has a rela associated with it. */
-		table_rela = find_rela_by_dest(table_sec, table_offset);
+		/*
+		 * Each table entry has a rela associated with it.  The rela
+		 * should reference text in the same function as the original
+		 * instruction.
+		 */
+		table_rela = find_rela_by_dest(file->elf, table_sec, table_offset);
 		if (!table_rela)
 			continue;
+		dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend);
+		if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
+			continue;
 
 		/*
 		 * Use of RIP-relative switch jumps is quite rare, and
@@ -1082,7 +1128,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
 	struct instruction *insn, *last = NULL;
 	struct rela *rela;
 
-	func_for_each_insn_all(file, func, insn) {
+	func_for_each_insn(file, func, insn) {
 		if (!last)
 			last = insn;
 
@@ -1117,7 +1163,7 @@ static int add_func_jump_tables(struct objtool_file *file,
 	struct instruction *insn;
 	int ret;
 
-	func_for_each_insn_all(file, func, insn) {
+	func_for_each_insn(file, func, insn) {
 		if (!insn->jump_table)
 			continue;
 
@@ -1187,7 +1233,7 @@ static int read_unwind_hints(struct objtool_file *file)
 	for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
 		hint = (struct unwind_hint *)sec->data->d_buf + i;
 
-		rela = find_rela_by_dest(sec, i * sizeof(*hint));
+		rela = find_rela_by_dest(file->elf, sec, i * sizeof(*hint));
 		if (!rela) {
 			WARN("can't find rela for unwind_hints[%d]", i);
 			return -1;
@@ -1935,6 +1981,41 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
 	return validate_call(insn, state);
 }
 
+static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state)
+{
+	if (state->uaccess && !func_uaccess_safe(func)) {
+		WARN_FUNC("return with UACCESS enabled",
+			  insn->sec, insn->offset);
+		return 1;
+	}
+
+	if (!state->uaccess && func_uaccess_safe(func)) {
+		WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function",
+			  insn->sec, insn->offset);
+		return 1;
+	}
+
+	if (state->df) {
+		WARN_FUNC("return with DF set",
+			  insn->sec, insn->offset);
+		return 1;
+	}
+
+	if (func && has_modified_stack_frame(state)) {
+		WARN_FUNC("return with modified stack frame",
+			  insn->sec, insn->offset);
+		return 1;
+	}
+
+	if (state->bp_scratch) {
+		WARN("%s uses BP as a scratch register",
+		     func->name);
+		return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
@@ -1989,7 +2070,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 				i = insn;
 				save_insn = NULL;
-				func_for_each_insn_continue_reverse(file, func, i) {
+				sym_for_each_insn_continue_reverse(file, func, i) {
 					if (i->save) {
 						save_insn = i;
 						break;
@@ -2050,34 +2131,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 		switch (insn->type) {
 
 		case INSN_RETURN:
-			if (state.uaccess && !func_uaccess_safe(func)) {
-				WARN_FUNC("return with UACCESS enabled", sec, insn->offset);
-				return 1;
-			}
-
-			if (!state.uaccess && func_uaccess_safe(func)) {
-				WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", sec, insn->offset);
-				return 1;
-			}
-
-			if (state.df) {
-				WARN_FUNC("return with DF set", sec, insn->offset);
-				return 1;
-			}
-
-			if (func && has_modified_stack_frame(&state)) {
-				WARN_FUNC("return with modified stack frame",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			if (state.bp_scratch) {
-				WARN("%s uses BP as a scratch register",
-				     func->name);
-				return 1;
-			}
-
-			return 0;
+			return validate_return(func, insn, &state);
 
 		case INSN_CALL:
 		case INSN_CALL_DYNAMIC:
@@ -2342,9 +2396,8 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 	return false;
 }
 
-static int validate_functions(struct objtool_file *file)
+static int validate_section(struct objtool_file *file, struct section *sec)
 {
-	struct section *sec;
 	struct symbol *func;
 	struct instruction *insn;
 	struct insn_state state;
@@ -2357,36 +2410,45 @@ static int validate_functions(struct objtool_file *file)
 	       CFI_NUM_REGS * sizeof(struct cfi_reg));
 	state.stack_size = initial_func_cfi.cfa.offset;
 
-	for_each_sec(file, sec) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
+	list_for_each_entry(func, &sec->symbol_list, list) {
+		if (func->type != STT_FUNC)
+			continue;
 
-			if (!func->len) {
-				WARN("%s() is missing an ELF size annotation",
-				     func->name);
-				warnings++;
-			}
+		if (!func->len) {
+			WARN("%s() is missing an ELF size annotation",
+			     func->name);
+			warnings++;
+		}
 
-			if (func->pfunc != func || func->alias != func)
-				continue;
+		if (func->pfunc != func || func->alias != func)
+			continue;
 
-			insn = find_insn(file, sec, func->offset);
-			if (!insn || insn->ignore || insn->visited)
-				continue;
+		insn = find_insn(file, sec, func->offset);
+		if (!insn || insn->ignore || insn->visited)
+			continue;
 
-			state.uaccess = func->uaccess_safe;
+		state.uaccess = func->uaccess_safe;
 
-			ret = validate_branch(file, func, insn, state);
-			if (ret && backtrace)
-				BT_FUNC("<=== (func)", insn);
-			warnings += ret;
-		}
+		ret = validate_branch(file, func, insn, state);
+		if (ret && backtrace)
+			BT_FUNC("<=== (func)", insn);
+		warnings += ret;
 	}
 
 	return warnings;
 }
 
+static int validate_functions(struct objtool_file *file)
+{
+	struct section *sec;
+	int warnings = 0;
+
+	for_each_sec(file, sec)
+		warnings += validate_section(file, sec);
+
+	return warnings;
+}
+
 static int validate_reachable_instructions(struct objtool_file *file)
 {
 	struct instruction *insn;
@@ -2405,23 +2467,6 @@ static int validate_reachable_instructions(struct objtool_file *file)
 	return 0;
 }
 
-static void cleanup(struct objtool_file *file)
-{
-	struct instruction *insn, *tmpinsn;
-	struct alternative *alt, *tmpalt;
-
-	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
-		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
-			list_del(&alt->list);
-			free(alt);
-		}
-		list_del(&insn->list);
-		hash_del(&insn->hash);
-		free(insn);
-	}
-	elf_close(file->elf);
-}
-
 static struct objtool_file file;
 
 int check(const char *_objname, bool orc)
@@ -2489,10 +2534,14 @@ int check(const char *_objname, bool orc)
 	}
 
 out:
-	cleanup(&file);
+	if (ret < 0) {
+		/*
+		 *  Fatal error.  The binary is corrupt or otherwise broken in
+		 *  some way, or objtool itself is broken.  Fail the kernel
+		 *  build.
+		 */
+		return ret;
+	}
 
-	/* ignore warnings for now until we get all the code cleaned up */
-	if (ret || warnings)
-		return 0;
 	return 0;
 }
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 6d875ca6fce0..f0ce8ffe7135 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -50,7 +50,7 @@ struct instruction {
 struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
-	DECLARE_HASHTABLE(insn_hash, 16);
+	DECLARE_HASHTABLE(insn_hash, 20);
 	bool ignore_unreachables, c_file, hints, rodata;
 };
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index edba4745f25a..09ddc8f1def3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -15,17 +15,107 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include "builtin.h"
 
 #include "elf.h"
 #include "warn.h"
 
 #define MAX_NAME_LEN 128
 
+static inline u32 str_hash(const char *str)
+{
+	return jhash(str, strlen(str), 0);
+}
+
+static void rb_add(struct rb_root *tree, struct rb_node *node,
+		   int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+	struct rb_node **link = &tree->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*link) {
+		parent = *link;
+		if (cmp(node, parent) < 0)
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+
+	rb_link_node(node, parent, link);
+	rb_insert_color(node, tree);
+}
+
+static struct rb_node *rb_find_first(struct rb_root *tree, const void *key,
+			       int (*cmp)(const void *key, const struct rb_node *))
+{
+	struct rb_node *node = tree->rb_node;
+	struct rb_node *match = NULL;
+
+	while (node) {
+		int c = cmp(key, node);
+		if (c <= 0) {
+			if (!c)
+				match = node;
+			node = node->rb_left;
+		} else if (c > 0) {
+			node = node->rb_right;
+		}
+	}
+
+	return match;
+}
+
+static struct rb_node *rb_next_match(struct rb_node *node, const void *key,
+				    int (*cmp)(const void *key, const struct rb_node *))
+{
+	node = rb_next(node);
+	if (node && cmp(key, node))
+		node = NULL;
+	return node;
+}
+
+#define rb_for_each(tree, node, key, cmp) \
+	for ((node) = rb_find_first((tree), (key), (cmp)); \
+	     (node); (node) = rb_next_match((node), (key), (cmp)))
+
+static int symbol_to_offset(struct rb_node *a, const struct rb_node *b)
+{
+	struct symbol *sa = rb_entry(a, struct symbol, node);
+	struct symbol *sb = rb_entry(b, struct symbol, node);
+
+	if (sa->offset < sb->offset)
+		return -1;
+	if (sa->offset > sb->offset)
+		return 1;
+
+	if (sa->len < sb->len)
+		return -1;
+	if (sa->len > sb->len)
+		return 1;
+
+	sa->alias = sb;
+
+	return 0;
+}
+
+static int symbol_by_offset(const void *key, const struct rb_node *node)
+{
+	const struct symbol *s = rb_entry(node, struct symbol, node);
+	const unsigned long *o = key;
+
+	if (*o < s->offset)
+		return -1;
+	if (*o > s->offset + s->len)
+		return 1;
+
+	return 0;
+}
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
 
-	list_for_each_entry(sec, &elf->sections, list)
+	hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
 		if (!strcmp(sec->name, name))
 			return sec;
 
@@ -37,7 +127,7 @@ static struct section *find_section_by_index(struct elf *elf,
 {
 	struct section *sec;
 
-	list_for_each_entry(sec, &elf->sections, list)
+	hash_for_each_possible(elf->section_hash, sec, hash, idx)
 		if (sec->idx == idx)
 			return sec;
 
@@ -46,88 +136,116 @@ static struct section *find_section_by_index(struct elf *elf,
 
 static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 {
-	struct section *sec;
 	struct symbol *sym;
 
-	list_for_each_entry(sec, &elf->sections, list)
-		hash_for_each_possible(sec->symbol_hash, sym, hash, idx)
-			if (sym->idx == idx)
-				return sym;
+	hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+		if (sym->idx == idx)
+			return sym;
 
 	return NULL;
 }
 
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 {
-	struct symbol *sym;
+	struct rb_node *node;
 
-	list_for_each_entry(sym, &sec->symbol_list, list)
-		if (sym->type != STT_SECTION &&
-		    sym->offset == offset)
-			return sym;
+	rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+		struct symbol *s = rb_entry(node, struct symbol, node);
+
+		if (s->offset == offset && s->type != STT_SECTION)
+			return s;
+	}
 
 	return NULL;
 }
 
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 {
-	struct section *sec;
-	struct symbol *sym;
+	struct rb_node *node;
 
-	list_for_each_entry(sec, &elf->sections, list)
-		list_for_each_entry(sym, &sec->symbol_list, list)
-			if (!strcmp(sym->name, name))
-				return sym;
+	rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+		struct symbol *s = rb_entry(node, struct symbol, node);
+
+		if (s->offset == offset && s->type == STT_FUNC)
+			return s;
+	}
 
 	return NULL;
 }
 
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
 {
-	struct symbol *sym;
+	struct rb_node *node;
 
-	list_for_each_entry(sym, &sec->symbol_list, list)
-		if (sym->type != STT_SECTION &&
-		    offset >= sym->offset && offset < sym->offset + sym->len)
-			return sym;
+	rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+		struct symbol *s = rb_entry(node, struct symbol, node);
+
+		if (s->type != STT_SECTION)
+			return s;
+	}
 
 	return NULL;
 }
 
-struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
-				     unsigned int len)
+struct symbol *find_func_containing(struct section *sec, unsigned long offset)
 {
-	struct rela *rela;
-	unsigned long o;
+	struct rb_node *node;
 
-	if (!sec->rela)
-		return NULL;
+	rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+		struct symbol *s = rb_entry(node, struct symbol, node);
 
-	for (o = offset; o < offset + len; o++)
-		hash_for_each_possible(sec->rela->rela_hash, rela, hash, o)
-			if (rela->offset == o)
-				return rela;
+		if (s->type == STT_FUNC)
+			return s;
+	}
 
 	return NULL;
 }
 
-struct rela *find_rela_by_dest(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
 {
-	return find_rela_by_dest_range(sec, offset, 1);
+	struct symbol *sym;
+
+	hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+		if (!strcmp(sym->name, name))
+			return sym;
+
+	return NULL;
 }
 
-struct symbol *find_containing_func(struct section *sec, unsigned long offset)
+struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+				     unsigned long offset, unsigned int len)
 {
-	struct symbol *func;
+	struct rela *rela, *r = NULL;
+	unsigned long o;
+
+	if (!sec->rela)
+		return NULL;
 
-	list_for_each_entry(func, &sec->symbol_list, list)
-		if (func->type == STT_FUNC && offset >= func->offset &&
-		    offset < func->offset + func->len)
-			return func;
+	sec = sec->rela;
+
+	for_offset_range(o, offset, offset + len) {
+		hash_for_each_possible(elf->rela_hash, rela, hash,
+				       sec_offset_hash(sec, o)) {
+			if (rela->sec != sec)
+				continue;
+
+			if (rela->offset >= offset && rela->offset < offset + len) {
+				if (!r || rela->offset < r->offset)
+					r = rela;
+			}
+		}
+		if (r)
+			return r;
+	}
 
 	return NULL;
 }
 
+struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset)
+{
+	return find_rela_by_dest_range(elf, sec, offset, 1);
+}
+
 static int read_sections(struct elf *elf)
 {
 	Elf_Scn *s = NULL;
@@ -155,10 +273,6 @@ static int read_sections(struct elf *elf)
 
 		INIT_LIST_HEAD(&sec->symbol_list);
 		INIT_LIST_HEAD(&sec->rela_list);
-		hash_init(sec->rela_hash);
-		hash_init(sec->symbol_hash);
-
-		list_add_tail(&sec->list, &elf->sections);
 
 		s = elf_getscn(elf->elf, i);
 		if (!s) {
@@ -193,8 +307,15 @@ static int read_sections(struct elf *elf)
 			}
 		}
 		sec->len = sec->sh.sh_size;
+
+		list_add_tail(&sec->list, &elf->sections);
+		hash_add(elf->section_hash, &sec->hash, sec->idx);
+		hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
 	}
 
+	if (stats)
+		printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+
 	/* sanity check, one more call to elf_nextscn() should return NULL */
 	if (elf_nextscn(elf->elf, s)) {
 		WARN("section entry mismatch");
@@ -207,8 +328,9 @@ static int read_sections(struct elf *elf)
 static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *sec;
-	struct symbol *sym, *pfunc, *alias;
-	struct list_head *entry, *tmp;
+	struct symbol *sym, *pfunc;
+	struct list_head *entry;
+	struct rb_node *pnode;
 	int symbols_nr, i;
 	char *coldstr;
 
@@ -227,7 +349,7 @@ static int read_symbols(struct elf *elf)
 			return -1;
 		}
 		memset(sym, 0, sizeof(*sym));
-		alias = sym;
+		sym->alias = sym;
 
 		sym->idx = i;
 
@@ -265,33 +387,20 @@ static int read_symbols(struct elf *elf)
 		sym->offset = sym->sym.st_value;
 		sym->len = sym->sym.st_size;
 
-		/* sorted insert into a per-section list */
-		entry = &sym->sec->symbol_list;
-		list_for_each_prev(tmp, &sym->sec->symbol_list) {
-			struct symbol *s;
-
-			s = list_entry(tmp, struct symbol, list);
-
-			if (sym->offset > s->offset) {
-				entry = tmp;
-				break;
-			}
-
-			if (sym->offset == s->offset) {
-				if (sym->len && sym->len == s->len && alias == sym)
-					alias = s;
-
-				if (sym->len >= s->len) {
-					entry = tmp;
-					break;
-				}
-			}
-		}
-		sym->alias = alias;
+		rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
+		pnode = rb_prev(&sym->node);
+		if (pnode)
+			entry = &rb_entry(pnode, struct symbol, node)->list;
+		else
+			entry = &sym->sec->symbol_list;
 		list_add(&sym->list, entry);
-		hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
+		hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+		hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
 	}
 
+	if (stats)
+		printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
 		list_for_each_entry(sym, &sec->symbol_list, list) {
@@ -353,6 +462,7 @@ static int read_relas(struct elf *elf)
 	struct rela *rela;
 	int i;
 	unsigned int symndx;
+	unsigned long nr_rela, max_rela = 0, tot_rela = 0;
 
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->sh.sh_type != SHT_RELA)
@@ -367,6 +477,7 @@ static int read_relas(struct elf *elf)
 
 		sec->base->rela = sec;
 
+		nr_rela = 0;
 		for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
 			rela = malloc(sizeof(*rela));
 			if (!rela) {
@@ -393,9 +504,16 @@ static int read_relas(struct elf *elf)
 			}
 
 			list_add_tail(&rela->list, &sec->rela_list);
-			hash_add(sec->rela_hash, &rela->hash, rela->offset);
-
+			hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+			nr_rela++;
 		}
+		max_rela = max(max_rela, nr_rela);
+		tot_rela += nr_rela;
+	}
+
+	if (stats) {
+		printf("max_rela: %lu\n", max_rela);
+		printf("tot_rela: %lu\n", tot_rela);
 	}
 
 	return 0;
@@ -415,6 +533,11 @@ struct elf *elf_read(const char *name, int flags)
 	}
 	memset(elf, 0, sizeof(*elf));
 
+	hash_init(elf->symbol_hash);
+	hash_init(elf->symbol_name_hash);
+	hash_init(elf->section_hash);
+	hash_init(elf->section_name_hash);
+	hash_init(elf->rela_hash);
 	INIT_LIST_HEAD(&elf->sections);
 
 	elf->fd = open(name, flags);
@@ -475,10 +598,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 
 	INIT_LIST_HEAD(&sec->symbol_list);
 	INIT_LIST_HEAD(&sec->rela_list);
-	hash_init(sec->rela_hash);
-	hash_init(sec->symbol_hash);
-
-	list_add_tail(&sec->list, &elf->sections);
 
 	s = elf_newscn(elf->elf);
 	if (!s) {
@@ -556,6 +675,10 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 	shstrtab->len += strlen(name) + 1;
 	shstrtab->changed = true;
 
+	list_add_tail(&sec->list, &elf->sections);
+	hash_add(elf->section_hash, &sec->hash, sec->idx);
+	hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+
 	return sec;
 }
 
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 44150204db4d..ebbb10c61e24 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -10,6 +10,8 @@
 #include <gelf.h>
 #include <linux/list.h>
 #include <linux/hashtable.h>
+#include <linux/rbtree.h>
+#include <linux/jhash.h>
 
 #ifdef LIBELF_USE_DEPRECATED
 # define elf_getshdrnum    elf_getshnum
@@ -25,11 +27,12 @@
 
 struct section {
 	struct list_head list;
+	struct hlist_node hash;
+	struct hlist_node name_hash;
 	GElf_Shdr sh;
+	struct rb_root symbol_tree;
 	struct list_head symbol_list;
-	DECLARE_HASHTABLE(symbol_hash, 8);
 	struct list_head rela_list;
-	DECLARE_HASHTABLE(rela_hash, 16);
 	struct section *base, *rela;
 	struct symbol *sym;
 	Elf_Data *data;
@@ -41,7 +44,9 @@ struct section {
 
 struct symbol {
 	struct list_head list;
+	struct rb_node node;
 	struct hlist_node hash;
+	struct hlist_node name_hash;
 	GElf_Sym sym;
 	struct section *sec;
 	char *name;
@@ -71,19 +76,51 @@ struct elf {
 	int fd;
 	char *name;
 	struct list_head sections;
-	DECLARE_HASHTABLE(rela_hash, 16);
+	DECLARE_HASHTABLE(symbol_hash, 20);
+	DECLARE_HASHTABLE(symbol_name_hash, 20);
+	DECLARE_HASHTABLE(section_hash, 16);
+	DECLARE_HASHTABLE(section_name_hash, 16);
+	DECLARE_HASHTABLE(rela_hash, 20);
 };
 
+#define OFFSET_STRIDE_BITS	4
+#define OFFSET_STRIDE		(1UL << OFFSET_STRIDE_BITS)
+#define OFFSET_STRIDE_MASK	(~(OFFSET_STRIDE - 1))
+
+#define for_offset_range(_offset, _start, _end)		\
+	for (_offset = ((_start) & OFFSET_STRIDE_MASK);	\
+	     _offset <= ((_end) & OFFSET_STRIDE_MASK);	\
+	     _offset += OFFSET_STRIDE)
+
+static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
+{
+	u32 ol, oh, idx = sec->idx;
+
+	offset &= OFFSET_STRIDE_MASK;
+
+	ol = offset;
+	oh = offset >> 32;
+
+	__jhash_mix(ol, oh, idx);
+
+	return ol;
+}
+
+static inline u32 rela_hash(struct rela *rela)
+{
+	return sec_offset_hash(rela->sec, rela->offset);
+}
 
 struct elf *elf_read(const char *name, int flags);
 struct section *find_section_by_name(struct elf *elf, const char *name);
+struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
-				     unsigned int len);
-struct symbol *find_containing_func(struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+				     unsigned long offset, unsigned int len);
+struct symbol *find_func_containing(struct section *sec, unsigned long offset);
 struct section *elf_create_section(struct elf *elf, const char *name, size_t
 				   entsize, int nr);
 struct section *elf_create_rela_section(struct elf *elf, struct section *base);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 27a4112848c2..41e4a2754da4 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -81,7 +81,7 @@ int create_orc(struct objtool_file *file)
 	return 0;
 }
 
-static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
+static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec,
 				unsigned int idx, struct section *insn_sec,
 				unsigned long insn_off, struct orc_entry *o)
 {
@@ -109,9 +109,10 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
 	rela->addend = insn_off;
 	rela->type = R_X86_64_PC32;
 	rela->offset = idx * sizeof(int);
+	rela->sec = ip_relasec;
 
 	list_add_tail(&rela->list, &ip_relasec->rela_list);
-	hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
+	hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
 
 	return 0;
 }
@@ -182,7 +183,7 @@ int create_orc_sections(struct objtool_file *file)
 			if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
 						 sizeof(struct orc_entry))) {
 
-				if (create_orc_entry(u_sec, ip_relasec, idx,
+				if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
 						     insn->sec, insn->offset,
 						     &insn->orc))
 					return -1;
@@ -194,7 +195,7 @@ int create_orc_sections(struct objtool_file *file)
 
 		/* section terminator */
 		if (prev_insn) {
-			if (create_orc_entry(u_sec, ip_relasec, idx,
+			if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
 					     prev_insn->sec,
 					     prev_insn->offset + prev_insn->len,
 					     &empty))
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index fdbaa611146d..e74e0189de22 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -118,7 +118,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 		}
 	}
 
-	orig_rela = find_rela_by_dest(sec, offset + entry->orig);
+	orig_rela = find_rela_by_dest(elf, sec, offset + entry->orig);
 	if (!orig_rela) {
 		WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
 		return -1;
@@ -133,7 +133,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 	alt->orig_off = orig_rela->addend;
 
 	if (!entry->group || alt->new_len) {
-		new_rela = find_rela_by_dest(sec, offset + entry->new);
+		new_rela = find_rela_by_dest(elf, sec, offset + entry->new);
 		if (!new_rela) {
 			WARN_FUNC("can't find new rela",
 				  sec, offset + entry->new);
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index cbb0a02b7480..7799f60de80a 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -21,7 +21,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	char *name, *str;
 	unsigned long name_off;
 
-	func = find_containing_func(sec, offset);
+	func = find_func_containing(sec, offset);
 	if (func) {
 		name = func->name;
 		name_off = offset - func->offset;
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index 32b7c6f9043d..0a1344c45213 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -30,14 +30,17 @@ struct pci_test {
 	int		irqtype;
 	bool		set_irqtype;
 	bool		get_irqtype;
+	bool		clear_irq;
 	bool		read;
 	bool		write;
 	bool		copy;
 	unsigned long	size;
+	bool		use_dma;
 };
 
 static int run_test(struct pci_test *test)
 {
+	struct pci_endpoint_test_xfer_param param;
 	int ret = -EINVAL;
 	int fd;
 
@@ -74,6 +77,15 @@ static int run_test(struct pci_test *test)
 			fprintf(stdout, "%s\n", irq[ret]);
 	}
 
+	if (test->clear_irq) {
+		ret = ioctl(fd, PCITEST_CLEAR_IRQ);
+		fprintf(stdout, "CLEAR IRQ:\t\t");
+		if (ret < 0)
+			fprintf(stdout, "FAILED\n");
+		else
+			fprintf(stdout, "%s\n", result[ret]);
+	}
+
 	if (test->legacyirq) {
 		ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
 		fprintf(stdout, "LEGACY IRQ:\t");
@@ -102,7 +114,10 @@ static int run_test(struct pci_test *test)
 	}
 
 	if (test->write) {
-		ret = ioctl(fd, PCITEST_WRITE, test->size);
+		param.size = test->size;
+		if (test->use_dma)
+			param.flags = PCITEST_FLAGS_USE_DMA;
+		ret = ioctl(fd, PCITEST_WRITE, &param);
 		fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size);
 		if (ret < 0)
 			fprintf(stdout, "TEST FAILED\n");
@@ -111,7 +126,10 @@ static int run_test(struct pci_test *test)
 	}
 
 	if (test->read) {
-		ret = ioctl(fd, PCITEST_READ, test->size);
+		param.size = test->size;
+		if (test->use_dma)
+			param.flags = PCITEST_FLAGS_USE_DMA;
+		ret = ioctl(fd, PCITEST_READ, &param);
 		fprintf(stdout, "READ (%7ld bytes):\t\t", test->size);
 		if (ret < 0)
 			fprintf(stdout, "TEST FAILED\n");
@@ -120,7 +138,10 @@ static int run_test(struct pci_test *test)
 	}
 
 	if (test->copy) {
-		ret = ioctl(fd, PCITEST_COPY, test->size);
+		param.size = test->size;
+		if (test->use_dma)
+			param.flags = PCITEST_FLAGS_USE_DMA;
+		ret = ioctl(fd, PCITEST_COPY, &param);
 		fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size);
 		if (ret < 0)
 			fprintf(stdout, "TEST FAILED\n");
@@ -153,7 +174,7 @@ int main(int argc, char **argv)
 	/* set default endpoint device */
 	test->device = "/dev/pci-endpoint-test.0";
 
-	while ((c = getopt(argc, argv, "D:b:m:x:i:Ilhrwcs:")) != EOF)
+	while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF)
 	switch (c) {
 	case 'D':
 		test->device = optarg;
@@ -194,9 +215,15 @@ int main(int argc, char **argv)
 	case 'c':
 		test->copy = true;
 		continue;
+	case 'e':
+		test->clear_irq = true;
+		continue;
 	case 's':
 		test->size = strtoul(optarg, NULL, 0);
 		continue;
+	case 'd':
+		test->use_dma = true;
+		continue;
 	case 'h':
 	default:
 usage:
@@ -208,7 +235,9 @@ usage:
 			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
 			"\t-x <msix num>	\tMSI-X test (msix number between 1..2048)\n"
 			"\t-i <irq type>	\tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
+			"\t-e			Clear IRQ\n"
 			"\t-I			Get current IRQ type configured\n"
+			"\t-d			Use DMA\n"
 			"\t-l			Legacy IRQ test\n"
 			"\t-r			Read buffer test\n"
 			"\t-w			Write buffer test\n"
diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore
index 53d081336757..94cb97b77f06 100644
--- a/tools/pcmcia/.gitignore
+++ b/tools/pcmcia/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 crc32hash
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index bf1252dc2cb0..f3f84781fd74 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 PERF-CFLAGS
 PERF-GUI-VARS
 PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index adc5a7e44b98..31824d5269cc 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
 $(OUTPUT)%.xml : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
 	$(ASCIIDOC) -b docbook -d manpage \
-		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
+		-aperf_date=$(shell git log -1 --pretty="format:%cd" \
+				--date=short $<) \
+		-o $@+ $< && \
 	mv $@+ $@
 
 XSLT = docbook.xsl
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 2cf2d9e9d0da..fd9241a1b987 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -1,991 +1 @@
-Intel Processor Trace
-=====================
-
-Overview
-========
-
-Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
-collects information about software execution such as control flow, execution
-modes and timings and formats it into highly compressed binary packets.
-Technical details are documented in the Intel 64 and IA-32 Architectures
-Software Developer Manuals, Chapter 36 Intel Processor Trace.
-
-Intel PT is first supported in Intel Core M and 5th generation Intel Core
-processors that are based on the Intel micro-architecture code name Broadwell.
-
-Trace data is collected by 'perf record' and stored within the perf.data file.
-See below for options to 'perf record'.
-
-Trace data must be 'decoded' which involves walking the object code and matching
-the trace data packets. For example a TNT packet only tells whether a
-conditional branch was taken or not taken, so to make use of that packet the
-decoder must know precisely which instruction was being executed.
-
-Decoding is done on-the-fly.  The decoder outputs samples in the same format as
-samples output by perf hardware events, for example as though the "instructions"
-or "branches" events had been recorded.  Presently 3 tools support this:
-'perf script', 'perf report' and 'perf inject'.  See below for more information
-on using those tools.
-
-The main distinguishing feature of Intel PT is that the decoder can determine
-the exact flow of software execution.  Intel PT can be used to understand why
-and how did software get to a certain point, or behave a certain way.  The
-software does not have to be recompiled, so Intel PT works with debug or release
-builds, however the executed images are needed - which makes use in JIT-compiled
-environments, or with self-modified code, a challenge.  Also symbols need to be
-provided to make sense of addresses.
-
-A limitation of Intel PT is that it produces huge amounts of trace data
-(hundreds of megabytes per second per core) which takes a long time to decode,
-for example two or three orders of magnitude longer than it took to collect.
-Another limitation is the performance impact of tracing, something that will
-vary depending on the use-case and architecture.
-
-
-Quickstart
-==========
-
-It is important to start small.  That is because it is easy to capture vastly
-more data than can possibly be processed.
-
-The simplest thing to do with Intel PT is userspace profiling of small programs.
-Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
-
-	perf record -e intel_pt//u ls
-
-And profiled with 'perf report' e.g.
-
-	perf report
-
-To also trace kernel space presents a problem, namely kernel self-modifying
-code.  A fairly good kernel image is available in /proc/kcore but to get an
-accurate image a copy of /proc/kcore needs to be made under the same conditions
-as the data capture.  A script perf-with-kcore can do that, but beware that the
-script makes use of 'sudo' to copy /proc/kcore.  If you have perf installed
-locally from the source tree you can do:
-
-	~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
-
-which will create a directory named 'pt_ls' and put the perf.data file and
-copies of /proc/kcore, /proc/kallsyms and /proc/modules into it.  Then to use
-'perf report' becomes:
-
-	~/libexec/perf-core/perf-with-kcore report pt_ls
-
-Because samples are synthesized after-the-fact, the sampling period can be
-selected for reporting. e.g. sample every microsecond
-
-	~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
-
-See the sections below for more information about the --itrace option.
-
-Beware the smaller the period, the more samples that are produced, and the
-longer it takes to process them.
-
-Also note that the coarseness of Intel PT timing information will start to
-distort the statistical value of the sampling as the sampling period becomes
-smaller.
-
-To represent software control flow, "branches" samples are produced.  By default
-a branch sample is synthesized for every single branch.  To get an idea what
-data is available you can use the 'perf script' tool with all itrace sampling
-options, which will list all the samples.
-
-	perf record -e intel_pt//u ls
-	perf script --itrace=ibxwpe
-
-An interesting field that is not printed by default is 'flags' which can be
-displayed as follows:
-
-	perf script --itrace=ibxwpe -F+flags
-
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
-
-Another interesting field that is not printed by default is 'ipc' which can be
-displayed as follows:
-
-	perf script --itrace=be -F+ipc
-
-There are two ways that instructions-per-cycle (IPC) can be calculated depending
-on the recording.
-
-If the 'cyc' config term (see config terms section below) was used, then IPC is
-calculated using the cycle count from CYC packets, otherwise MTC packets are
-used - refer to the 'mtc' config term.  When MTC is used, however, the values
-are less accurate because the timing is less accurate.
-
-Because Intel PT does not update the cycle count on every branch or instruction,
-the values will often be zero.  When there are values, they will be the number
-of instructions and number of cycles since the last update, and thus represent
-the average IPC since the last IPC for that event type.  Note IPC for "branches"
-events is calculated separately from IPC for "instructions" events.
-
-Also note that the IPC instruction count may or may not include the current
-instruction.  If the cycle count is associated with an asynchronous branch
-(e.g. page fault or interrupt), then the instruction count does not include the
-current instruction, otherwise it does.  That is consistent with whether or not
-that instruction has retired when the cycle count is updated.
-
-Another note, in the case of "branches" events, non-taken branches are not
-presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
-TNT packet that starts with a non-taken branch.  To see every possible IPC
-value, "instructions" events can be used e.g. --itrace=i0ns
-
-While it is possible to create scripts to analyze the data, an alternative
-approach is available to export the data to a sqlite or postgresql database.
-Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script exported-sql-viewer.py for an example of using the database.
-
-There is also script intel-pt-events.py which provides an example of how to
-unpack the raw data for power events and PTWRITE.
-
-As mentioned above, it is easy to capture too much data.  One way to limit the
-data captured is to use 'snapshot' mode which is explained further below.
-Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
-
-Another problem that will be experienced is decoder errors.  They can be caused
-by inability to access the executed image, self-modified or JIT-ed code, or the
-inability to match side-band information (such as context switches and mmaps)
-which results in the decoder not knowing what code was executed.
-
-There is also the problem of perf not being able to copy the data fast enough,
-resulting in data lost because the buffer was full.  See 'Buffer handling' below
-for more details.
-
-
-perf record
-===========
-
-new event
----------
-
-The Intel PT kernel driver creates a new PMU for Intel PT.  PMU events are
-selected by providing the PMU name followed by the "config" separated by slashes.
-An enhancement has been made to allow default "config" e.g. the option
-
-	-e intel_pt//
-
-will use a default config value.  Currently that is the same as
-
-	-e intel_pt/tsc,noretcomp=0/
-
-which is the same as
-
-	-e intel_pt/tsc=1,noretcomp=0/
-
-Note there are now new config terms - see section 'config terms' further below.
-
-The config terms are listed in /sys/devices/intel_pt/format.  They are bit
-fields within the config member of the struct perf_event_attr which is
-passed to the kernel by the perf_event_open system call.  They correspond to bit
-fields in the IA32_RTIT_CTL MSR.  Here is a list of them and their definitions:
-
-	$ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
-	/sys/bus/event_source/devices/intel_pt/format/cyc:config:1
-	/sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
-	/sys/bus/event_source/devices/intel_pt/format/mtc:config:9
-	/sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
-	/sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
-	/sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
-	/sys/bus/event_source/devices/intel_pt/format/tsc:config:10
-
-Note that the default config must be overridden for each term i.e.
-
-	-e intel_pt/noretcomp=0/
-
-is the same as:
-
-	-e intel_pt/tsc=1,noretcomp=0/
-
-So, to disable TSC packets use:
-
-	-e intel_pt/tsc=0/
-
-It is also possible to specify the config value explicitly:
-
-	-e intel_pt/config=0x400/
-
-Note that, as with all events, the event is suffixed with event modifiers:
-
-	u	userspace
-	k	kernel
-	h	hypervisor
-	G	guest
-	H	host
-	p	precise ip
-
-'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
-'p' is also not relevant to Intel PT.  So only options 'u' and 'k' are
-meaningful for Intel PT.
-
-perf_event_attr is displayed if the -vv option is used e.g.
-
-	------------------------------------------------------------
-	perf_event_attr:
-	type                             6
-	size                             112
-	config                           0x400
-	{ sample_period, sample_freq }   1
-	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
-	read_format                      ID
-	disabled                         1
-	inherit                          1
-	exclude_kernel                   1
-	exclude_hv                       1
-	enable_on_exec                   1
-	sample_id_all                    1
-	------------------------------------------------------------
-	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
-	------------------------------------------------------------
-
-
-config terms
-------------
-
-The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
-Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
-Some of the features are reflect in new config terms.  All the config terms are
-described below.
-
-tsc		Always supported.  Produces TSC timestamp packets to provide
-		timing information.  In some cases it is possible to decode
-		without timing information, for example a per-thread context
-		that does not overlap executable memory maps.
-
-		The default config selects tsc (i.e. tsc=1).
-
-noretcomp	Always supported.  Disables "return compression" so a TIP packet
-		is produced when a function returns.  Causes more packets to be
-		produced but might make decoding more reliable.
-
-		The default config does not select noretcomp (i.e. noretcomp=0).
-
-psb_period	Allows the frequency of PSB packets to be specified.
-
-		The PSB packet is a synchronization packet that provides a
-		starting point for decoding or recovery from errors.
-
-		Support for psb_period is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
-		which contains "1" if the feature is supported and "0"
-		otherwise.
-
-		Valid values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_periods
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The psb_period value is converted to the approximate number of
-		trace bytes between PSB packets as:
-
-			2 ^ (value + 11)
-
-		e.g. value 3 means 16KiB bytes between PSBs
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/psb_period=15/u uname
-			Invalid psb_period for intel_pt. Valid values are: 0-5
-
-		If MTC packets are selected, the default config selects a value
-		of 3 (i.e. psb_period=3) or the nearest lower value that is
-		supported (0 is always supported).  Otherwise the default is 0.
-
-		If decoding is expected to be reliable and the buffer is large
-		then a large PSB period can be used.
-
-		Because a TSC packet is produced with PSB, the PSB period can
-		also affect the granularity to timing information in the absence
-		of MTC or CYC.
-
-mtc		Produces MTC timing packets.
-
-		MTC packets provide finer grain timestamp information than TSC
-		packets.  MTC packets record time using the hardware crystal
-		clock (CTC) which is related to TSC packets using a TMA packet.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/mtc
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-		The frequency of MTC packets can also be specified - see
-		mtc_period below.
-
-mtc_period	Specifies how frequently MTC packets are produced - see mtc
-		above for how to determine if MTC packets are supported.
-
-		Valid values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The mtc_period value is converted to the MTC frequency as:
-
-			CTC-frequency / (2 ^ value)
-
-		e.g. value 3 means one eighth of CTC-frequency
-
-		Where CTC is the hardware crystal clock, the frequency of which
-		can be related to TSC via values provided in cpuid leaf 0x15.
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/mtc_period=15/u uname
-			Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
-
-		The default value is 3 or the nearest lower value
-		that is supported (0 is always supported).
-
-cyc		Produces CYC timing packets.
-
-		CYC packets provide even finer grain timestamp information than
-		MTC and TSC packets.  A CYC packet contains the number of CPU
-		cycles since the last CYC packet. Unlike MTC and TSC packets,
-		CYC packets are only sent when another packet is also sent.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-		The number of CYC packets produced can be reduced by specifying
-		a threshold - see cyc_thresh below.
-
-cyc_thresh	Specifies how frequently CYC packets are produced - see cyc
-		above for how to determine if CYC packets are supported.
-
-		Valid cyc_thresh values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The cyc_thresh value represents the minimum number of CPU cycles
-		that must have passed before a CYC packet can be sent.  The
-		number of CPU cycles is:
-
-			2 ^ (value - 1)
-
-		e.g. value 4 means 8 CPU cycles must pass before a CYC packet
-		can be sent.  Note a CYC packet is still only sent when another
-		packet is sent, not at, e.g. every 8 CPU cycles.
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
-			Invalid cyc_thresh for intel_pt. Valid values are: 0-12
-
-		CYC packets are not requested by default.
-
-pt		Specifies pass-through which enables the 'branch' config term.
-
-		The default config selects 'pt' if it is available, so a user will
-		never need to specify this term.
-
-branch		Enable branch tracing.  Branch tracing is enabled by default so to
-		disable branch tracing use 'branch=0'.
-
-		The default config selects 'branch' if it is available.
-
-ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
-		is executed.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
-		provides the address of the ptwrite instruction.  In the absence of
-		fup_on_ptw, the decoder will use the address of the previous branch
-		if branch tracing is enabled, otherwise the address will be zero.
-		Note that fup_on_ptw will work even when branch tracing is disabled.
-
-pwr_evt		Enable power events.  The power events provide information about
-		changes to the CPU C-state.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-
-AUX area sampling option
-------------------------
-
-To select Intel PT "sampling" the AUX area sampling option can be used:
-
-	--aux-sample
-
-Optionally it can be followed by the sample size in bytes e.g.
-
-	--aux-sample=8192
-
-In addition, the Intel PT event to sample must be defined e.g.
-
-	-e intel_pt//u
-
-Samples on other events will be created containing Intel PT data e.g. the
-following will create Intel PT samples on the branch-misses event, note the
-events must be grouped using {}:
-
-	perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
-
-An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
-events.  In this case, the grouping is implied e.g.
-
-	perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
-
-is the same as:
-
-	perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
-
-but allows for also using an address filter e.g.:
-
-	perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
-
-It is important to select a sample size that is big enough to contain at least
-one PSB packet.  If not a warning will be displayed:
-
-	Intel PT sample size (%zu) may be too small for PSB period (%zu)
-
-The calculation used for that is: if sample_size <= psb_period + 256 display the
-warning.  When sampling is used, psb_period defaults to 0 (2KiB).
-
-The default sample size is 4KiB.
-
-The sample size is passed in aux_sample_size in struct perf_event_attr.  The
-sample size is limited by the maximum event size which is 64KiB.  It is
-difficult to know how big the event might be without the trace sample attached,
-but the tool validates that the sample size is not greater than 60KiB.
-
-
-new snapshot option
--------------------
-
-The difference between full trace and snapshot from the kernel's perspective is
-that in full trace we don't overwrite trace data that the user hasn't collected
-yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
-the trace run and overwrite older data in the buffer so that whenever something
-interesting happens, we can stop it and grab a snapshot of what was going on
-around that interesting moment.
-
-To select snapshot mode a new option has been added:
-
-	-S
-
-Optionally it can be followed by the snapshot size e.g.
-
-	-S0x100000
-
-The default snapshot size is the auxtrace mmap size.  If neither auxtrace mmap size
-nor snapshot size is specified, then the default is 4MiB for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced as described in the 'new auxtrace mmap size option' section below.
-
-The snapshot size is displayed if the option -vv is used e.g.
-
-	Intel PT snapshot size: %zu
-
-
-new auxtrace mmap size option
----------------------------
-
-Intel PT buffer size is specified by an addition to the -m option e.g.
-
-	-m,16
-
-selects a buffer size of 16 pages i.e. 64KiB.
-
-Note that the existing functionality of -m is unchanged.  The auxtrace mmap size
-is specified by the optional addition of a comma and the value.
-
-The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
-user is likely to get an error as they exceed their mlock limit (Max locked
-memory as shown in /proc/self/limits).  Note that perf does not count the first
-512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
-against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
-their mlock limit (which defaults to 64KiB but is not multiplied by the number
-of cpus).
-
-In full-trace mode, powers of two are allowed for buffer size, with a minimum
-size of 2 pages.  In snapshot mode or sampling mode, it is the same but the
-minimum size is 1 page.
-
-The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
-
-	mmap length 528384
-	auxtrace mmap length 4198400
-
-
-Intel PT modes of operation
----------------------------
-
-Intel PT can be used in 2 modes:
-	full-trace mode
-	sample mode
-	snapshot mode
-
-Full-trace mode traces continuously e.g.
-
-	perf record -e intel_pt//u uname
-
-Sample mode attaches a Intel PT sample to other events e.g.
-
-	perf record --aux-sample -e intel_pt//u -e branch-misses:u
-
-Snapshot mode captures the available data when a signal is sent e.g.
-
-	perf record -v -e intel_pt//u -S ./loopy 1000000000 &
-	[1] 11435
-	kill -USR2 11435
-	Recording AUX area tracing snapshot
-
-Note that the signal sent is SIGUSR2.
-Note that "Recording AUX area tracing snapshot" is displayed because the -v
-option is used.
-
-The 2 modes cannot be used together.
-
-
-Buffer handling
----------------
-
-There may be buffer limitations (i.e. single ToPa entry) which means that actual
-buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER).  In order to
-provide other sizes, and in particular an arbitrarily large size, multiple
-buffers are logically concatenated.  However an interrupt must be used to switch
-between buffers.  That has two potential problems:
-	a) the interrupt may not be handled in time so that the current buffer
-	becomes full and some trace data is lost.
-	b) the interrupts may slow the system and affect the performance
-	results.
-
-If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
-which the tools report as an error.
-
-In full-trace mode, the driver waits for data to be copied out before allowing
-the (logical) buffer to wrap-around.  If data is not copied out quickly enough,
-again 'truncated' is set in the PERF_RECORD_AUX event.  If the driver has to
-wait, the intel_pt event gets disabled.  Because it is difficult to know when
-that happens, perf tools always re-enable the intel_pt event after copying out
-data.
-
-
-Intel PT and build ids
-----------------------
-
-By default "perf record" post-processes the event stream to find all build ids
-for executables for all addresses sampled.  Deliberately, Intel PT is not
-decoded for that purpose (it would take too long).  Instead the build ids for
-all executables encountered (due to mmap, comm or task events) are included
-in the perf.data file.
-
-To see buildids included in the perf.data file use the command:
-
-	perf buildid-list
-
-If the perf.data file contains Intel PT data, that is the same as:
-
-	perf buildid-list --with-hits
-
-
-Snapshot mode and event disabling
----------------------------------
-
-In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
-namely PERF_EVENT_IOC_DISABLE.  However doing that can also disable the
-collection of side-band information.  In order to prevent that,  a dummy
-software event has been introduced that permits tracking events (like mmaps) to
-continue to be recorded while intel_pt is disabled.  That is important to ensure
-there is complete side-band information to allow the decoding of subsequent
-snapshots.
-
-A test has been created for that.  To find the test:
-
-	perf test list
-	...
-	23: Test using a dummy software event to keep tracking
-
-To run the test:
-
-	perf test 23
-	23: Test using a dummy software event to keep tracking     : Ok
-
-
-perf record modes (nothing new here)
-------------------------------------
-
-perf record essentially operates in one of three modes:
-	per thread
-	per cpu
-	workload only
-
-"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
-workload).
-"per cpu" is selected by -C or -a.
-"workload only" mode is selected by not using the other options but providing a
-command to run (i.e. the workload).
-
-In per-thread mode an exact list of threads is traced.  There is no inheritance.
-Each thread has its own event buffer.
-
-In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
-option, or processes selected with -p or -u) are traced.  Each cpu has its own
-buffer. Inheritance is allowed.
-
-In workload-only mode, the workload is traced but with per-cpu buffers.
-Inheritance is allowed.  Note that you can now trace a workload in per-thread
-mode by using the --per-thread option.
-
-
-Privileged vs non-privileged users
-----------------------------------
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
-have memory limits imposed upon them.  That affects what buffer sizes they can
-have as outlined above.
-
-The v4.2 kernel introduced support for a context switch metadata event,
-PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
-are scheduled out and in, just not by whom, which is left for the
-PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
-which in turn requires CAP_SYS_ADMIN.
-
-Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
-switches") commit, that introduces these metadata events for further info.
-
-When working with kernels < v4.2, the following considerations must be taken,
-as the sched:sched_switch tracepoints will be used to receive such information:
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
-not permitted to use tracepoints which means there is insufficient side-band
-information to decode Intel PT in per-cpu mode, and potentially workload-only
-mode too if the workload creates new processes.
-
-Note also, that to use tracepoints, read-access to debugfs is required.  So if
-debugfs is not mounted or the user does not have read-access, it will again not
-be possible to decode Intel PT in per-cpu mode.
-
-
-sched_switch tracepoint
------------------------
-
-The sched_switch tracepoint is used to provide side-band data for Intel PT
-decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
-available.
-
-The sched_switch events are automatically added. e.g. the second event shown
-below:
-
-	$ perf record -vv -e intel_pt//u uname
-	------------------------------------------------------------
-	perf_event_attr:
-	type                             6
-	size                             112
-	config                           0x400
-	{ sample_period, sample_freq }   1
-	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
-	read_format                      ID
-	disabled                         1
-	inherit                          1
-	exclude_kernel                   1
-	exclude_hv                       1
-	enable_on_exec                   1
-	sample_id_all                    1
-	------------------------------------------------------------
-	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
-	------------------------------------------------------------
-	perf_event_attr:
-	type                             2
-	size                             112
-	config                           0x108
-	{ sample_period, sample_freq }   1
-	sample_type                      IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
-	read_format                      ID
-	inherit                          1
-	sample_id_all                    1
-	exclude_guest                    1
-	------------------------------------------------------------
-	sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8
-	sys_perf_event_open: pid -1  cpu 1  group_fd -1  flags 0x8
-	sys_perf_event_open: pid -1  cpu 2  group_fd -1  flags 0x8
-	sys_perf_event_open: pid -1  cpu 3  group_fd -1  flags 0x8
-	------------------------------------------------------------
-	perf_event_attr:
-	type                             1
-	size                             112
-	config                           0x9
-	{ sample_period, sample_freq }   1
-	sample_type                      IP|TID|TIME|IDENTIFIER
-	read_format                      ID
-	disabled                         1
-	inherit                          1
-	exclude_kernel                   1
-	exclude_hv                       1
-	mmap                             1
-	comm                             1
-	enable_on_exec                   1
-	task                             1
-	sample_id_all                    1
-	mmap2                            1
-	comm_exec                        1
-	------------------------------------------------------------
-	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
-	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
-	mmap size 528384B
-	AUX area mmap length 4194304
-	perf event ring buffer mmapped per cpu
-	Synthesizing auxtrace information
-	Linux
-	[ perf record: Woken up 1 times to write data ]
-	[ perf record: Captured and wrote 0.042 MB perf.data ]
-
-Note, the sched_switch event is only added if the user is permitted to use it
-and only in per-cpu mode.
-
-Note also, the sched_switch event is only added if TSC packets are requested.
-That is because, in the absence of timing information, the sched_switch events
-cannot be matched against the Intel PT trace.
-
-
-perf script
-===========
-
-By default, perf script will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace.
-
-
-New --itrace option
--------------------
-
-Having no option is the same as
-
-	--itrace
-
-which, in turn, is the same as
-
-	--itrace=cepwx
-
-The letters are:
-
-	i	synthesize "instructions" events
-	b	synthesize "branches" events
-	x	synthesize "transactions" events
-	w	synthesize "ptwrite" events
-	p	synthesize "power" events
-	c	synthesize branches events (calls only)
-	r	synthesize branches events (returns only)
-	e	synthesize tracing error events
-	d	create a debug log
-	g	synthesize a call chain (use with i or x)
-	l	synthesize last branch entries (use with i or x)
-	s	skip initial number of events
-
-"Instructions" events look like they were recorded by "perf record -e
-instructions".
-
-"Branches" events look like they were recorded by "perf record -e branches". "c"
-and "r" can be combined to get calls and returns.
-
-"Transactions" events correspond to the start or end of transactions. The
-'flags' field can be used in perf script to determine whether the event is a
-tranasaction start, commit or abort.
-
-Note that "instructions", "branches" and "transactions" events depend on code
-flow packets which can be disabled by using the config term "branch=0".  Refer
-to the config terms section above.
-
-"ptwrite" events record the payload of the ptwrite instruction and whether
-"fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
-recorded only if the "ptw" config term was used.  Refer to the config terms
-section above.  perf script "synth" field displays "ptwrite" information like
-this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
-used.
-
-"Power" events correspond to power event packets and CBR (core-to-bus ratio)
-packets.  While CBR packets are always recorded when tracing is enabled, power
-event packets are recorded only if the "pwr_evt" config term was used.  Refer to
-the config terms section above.  The power events record information about
-C-state changes, whereas CBR is indicative of CPU frequency.  perf script
-"event,synth" fields display information like this:
-	cbr:  cbr: 22 freq: 2189 MHz (200%)
-	mwait:  hints: 0x60 extensions: 0x1
-	pwre:  hw: 0 cstate: 2 sub-cstate: 0
-	exstop:  ip: 1
-	pwrx:  deepest cstate: 2 last cstate: 2 wake reason: 0x4
-Where:
-	"cbr" includes the frequency and the percentage of maximum non-turbo
-	"mwait" shows mwait hints and extensions
-	"pwre" shows C-state transitions (to a C-state deeper than C0) and
-	whether	initiated by hardware
-	"exstop" indicates execution stopped and whether the IP was recorded
-	exactly,
-	"pwrx" indicates return to C0
-For more details refer to the Intel 64 and IA-32 Architectures Software
-Developer Manuals.
-
-Error events show where the decoder lost the trace.  Error events
-are quite important.  Users must know if what they are seeing is a complete
-picture or not.
-
-The "d" option will cause the creation of a file "intel_pt.log" containing all
-decoded packets and instructions.  Note that this option slows down the decoder
-and that the resulting file may be very large.
-
-In addition, the period of the "instructions" event can be specified. e.g.
-
-	--itrace=i10us
-
-sets the period to 10us i.e. one  instruction sample is synthesized for each 10
-microseconds of trace.  Alternatives to "us" are "ms" (milliseconds),
-"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
-
-"ms", "us" and "ns" are converted to TSC ticks.
-
-The timing information included with Intel PT does not give the time of every
-instruction.  Consequently, for the purpose of sampling, the decoder estimates
-the time since the last timing packet based on 1 tick per instruction.  The time
-on the sample is *not* adjusted and reflects the last known value of TSC.
-
-For Intel PT, the default period is 100us.
-
-Setting it to a zero period means "as often as possible".
-
-In the case of Intel PT that is the same as a period of 1 and a unit of
-'instructions' (i.e. --itrace=i1i).
-
-Also the call chain size (default 16, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
-	--itrace=ig32
-	--itrace=xg32
-
-Also the number of last branch entries (default 64, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
-       --itrace=il10
-       --itrace=xl10
-
-Note that last branch entries are cleared for each sample, so there is no overlap
-from one sample to the next.
-
-To disable trace decoding entirely, use the option --no-itrace.
-
-It is also possible to skip events generated (instructions, branches, transactions)
-at the beginning. This is useful to ignore initialization code.
-
-	--itrace=i0nss1000000
-
-skips the first million instructions.
-
-dump option
------------
-
-perf script has an option (-D) to "dump" the events i.e. display the binary
-data.
-
-When -D is used, Intel PT packets are displayed.  The packet decoder does not
-pay attention to PSB packets, but just decodes the bytes - so the packets seen
-by the actual decoder may not be identical in places where the data is corrupt.
-One example of that would be when the buffer-switching interrupt has been too
-slow, and the buffer has been filled completely.  In that case, the last packet
-in the buffer might be truncated and immediately followed by a PSB as the trace
-continues in the next buffer.
-
-To disable the display of Intel PT packets, combine the -D option with
---no-itrace.
-
-
-perf report
-===========
-
-By default, perf report will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace exactly the same as
-perf script, with the exception that the default is --itrace=igxe.
-
-
-perf inject
-===========
-
-perf inject also accepts the --itrace option in which case tracing data is
-removed and replaced with the synthesized events. e.g.
-
-	perf inject --itrace -i perf.data -o perf.data.new
-
-Below is an example of using Intel PT with autofdo.  It requires autofdo
-(https://github.com/google/autofdo) and gcc version 5.  The bubble
-sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
-amended to take the number of elements as a parameter.
-
-	$ gcc-5 -O3 sort.c -o sort_optimized
-	$ ./sort_optimized 30000
-	Bubble sorting array of 30000 elements
-	2254 ms
-
-	$ cat ~/.perfconfig
-	[intel-pt]
-		mispred-all = on
-
-	$ perf record -e intel_pt//u ./sort 3000
-	Bubble sorting array of 3000 elements
-	58 ms
-	[ perf record: Woken up 2 times to write data ]
-	[ perf record: Captured and wrote 3.939 MB perf.data ]
-	$ perf inject -i perf.data -o inj --itrace=i100usle --strip
-	$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
-	$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
-	$ ./sort_autofdo 30000
-	Bubble sorting array of 30000 elements
-	2155 ms
-
-Note there is currently no advantage to using Intel PT instead of LBR, but
-that may change in the future if greater use is made of the data.
-
-
-PEBS via Intel PT
-=================
-
-Some hardware has the feature to redirect PEBS records to the Intel PT trace.
-Recording is selected by using the aux-output config term e.g.
-
-	perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
-
-Note that currently, software only supports redirecting at most one PEBS event.
-
-To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
-
-	perf script --itrace=oe
+Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c4dd23c4b478..8ead55593984 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -239,7 +239,6 @@ buildid.*::
 		set buildid.dir to /dev/null. The default is $HOME/.debug
 
 annotate.*::
-	These options work only for TUI.
 	These are in control of addresses, jump function, source code
 	in lines of assembly code from a specific program.
 
@@ -269,6 +268,8 @@ annotate.*::
 		│        mov    (%rdi),%rdx
 		│              return n;
 
+		This option works with tui, stdio2 browsers.
+
         annotate.use_offset::
 		Basing on a first address of a loaded function, offset can be used.
 		Instead of using original addresses of assembly code,
@@ -287,6 +288,8 @@ annotate.*::
 
 		             368:│  mov    0x8(%r14),%rdi
 
+		This option works with tui, stdio2 browsers.
+
 	annotate.jump_arrows::
 		There can be jump instruction among assembly code.
 		Depending on a boolean value of jump_arrows,
@@ -306,6 +309,8 @@ annotate.*::
 		│1330:   mov    %r15,%r10
 		│1333:   cmp    %r15,%r14
 
+		This option works with tui browser.
+
         annotate.show_linenr::
 		When showing source code if this option is 'true',
 		line numbers are printed as below.
@@ -325,6 +330,8 @@ annotate.*::
 		│                     array++;
 		│             }
 
+		This option works with tui, stdio2 browsers.
+
         annotate.show_nr_jumps::
 		Let's see a part of assembly code.
 
@@ -335,6 +342,8 @@ annotate.*::
 
 		│1 1382:   movb   $0x1,-0x270(%rbp)
 
+		This option works with tui, stdio2 browsers.
+
         annotate.show_total_period::
 		To compare two records on an instruction base, with this option
 		provided, display total number of samples that belong to a line
@@ -348,11 +357,30 @@ annotate.*::
 
 		99.93 │      mov    %eax,%eax
 
+		This option works with tui, stdio2, stdio browsers.
+
+	annotate.show_nr_samples::
+		By default perf annotate shows percentage of samples. This option
+		can be used to print absolute number of samples. Ex, when set as
+		false:
+
+		Percent│
+		 74.03 │      mov    %fs:0x28,%rax
+
+		When set as true:
+
+		Samples│
+		     6 │      mov    %fs:0x28,%rax
+
+		This option works with tui, stdio2, stdio browsers.
+
 	annotate.offset_level::
 		Default is '1', meaning just jump targets will have offsets show right beside
 		the instruction. When set to '2' 'call' instructions will also have its offsets
 		shown, 3 or higher will show offsets for all instructions.
 
+		This option works with tui, stdio2 browsers.
+
 hist.*::
 	hist.percentage::
 		This option control the way to calculate overhead of filtered entries -
@@ -490,6 +518,12 @@ top.*::
 		column by default.
 		The default is 'true'.
 
+	top.call-graph::
+		This is identical to 'call-graph.record-mode', except it is
+		applicable only for 'top' subcommand. This option ONLY setup
+		the unwind method. To enable 'perf top' to actually use it,
+		the command line option -g must be specified.
+
 man.*::
 	man.viewer::
 		This option can assign a tool to view manual pages when 'help'
@@ -517,6 +551,16 @@ record.*::
 		But if this option is 'no-cache', it will not update the build-id cache.
 		'skip' skips post-processing and does not update the cache.
 
+	record.call-graph::
+		This is identical to 'call-graph.record-mode', except it is
+		applicable only for 'record' subcommand. This option ONLY setup
+		the unwind method. To enable 'perf record' to actually use it,
+		the command line option -g must be specified.
+
+	record.aio::
+		Use 'n' control blocks in asynchronous (Posix AIO) trace writing
+		mode ('n' default: 1, max: 4).
+
 diff.*::
 	diff.order::
 		This option sets the number of columns to sort the result.
@@ -566,6 +610,11 @@ trace.*::
 		"libbeauty", the default, to use the same argument beautifiers used in the
 		strace-like sys_enter+sys_exit lines.
 
+ftrace.*::
+	ftrace.tracer::
+		Can be used to select the default tracer. Possible values are
+		'function' and 'function_graph'.
+
 llvm.*::
 	llvm.clang-path::
 		Path to clang. If omit, search it from $PATH.
@@ -610,6 +659,29 @@ scripts.*::
 	The script gets the same options passed as a full perf script,
 	in particular -i perfdata file, --cpu, --tid
 
+convert.*::
+
+	convert.queue-size::
+		Limit the size of ordered_events queue, so we could control
+		allocation size of perf data files without proper finished
+		round events.
+
+intel-pt.*::
+
+	intel-pt.cache-divisor::
+
+	intel-pt.mispred-all::
+		If set, Intel PT decoder will set the mispred flag on all
+		branches.
+
+auxtrace.*::
+
+	auxtrace.dumpdir::
+		s390 only. The directory to save the auxiliary trace buffer
+		can be changed using this option. Ex, auxtrace.dumpdir=/tmp.
+		If the directory does not exist or has the wrong file type,
+		the current directory is used.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index a64d6588470e..70969ea73e01 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -66,4 +66,5 @@ include::itrace.txt[]
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
new file mode 100644
index 000000000000..456fdcbf26ac
--- /dev/null
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -0,0 +1,1007 @@
+perf-intel-pt(1)
+================
+
+NAME
+----
+perf-intel-pt - Support for Intel Processor Trace within perf tools
+
+SYNOPSIS
+--------
+[verse]
+'perf record' -e intel_pt//
+
+DESCRIPTION
+-----------
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly.  The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded.  Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'.  See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution.  Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way.  The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge.  Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+----------
+
+It is important to start small.  That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+	perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+	perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code.  A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture.  A script perf-with-kcore can do that, but beware that the
+script makes use of 'sudo' to copy /proc/kcore.  If you have perf installed
+locally from the source tree you can do:
+
+	~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file and
+copies of /proc/kcore, /proc/kallsyms and /proc/modules into it.  Then to use
+'perf report' becomes:
+
+	~/libexec/perf-core/perf-with-kcore report pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+	~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced.  By default
+a branch sample is synthesized for every single branch.  To get an idea what
+data is available you can use the 'perf script' tool with all itrace sampling
+options, which will list all the samples.
+
+	perf record -e intel_pt//u ls
+	perf script --itrace=ibxwpe
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+	perf script --itrace=ibxwpe -F+flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+Another interesting field that is not printed by default is 'ipc' which can be
+displayed as follows:
+
+	perf script --itrace=be -F+ipc
+
+There are two ways that instructions-per-cycle (IPC) can be calculated depending
+on the recording.
+
+If the 'cyc' config term (see config terms section below) was used, then IPC is
+calculated using the cycle count from CYC packets, otherwise MTC packets are
+used - refer to the 'mtc' config term.  When MTC is used, however, the values
+are less accurate because the timing is less accurate.
+
+Because Intel PT does not update the cycle count on every branch or instruction,
+the values will often be zero.  When there are values, they will be the number
+of instructions and number of cycles since the last update, and thus represent
+the average IPC since the last IPC for that event type.  Note IPC for "branches"
+events is calculated separately from IPC for "instructions" events.
+
+Also note that the IPC instruction count may or may not include the current
+instruction.  If the cycle count is associated with an asynchronous branch
+(e.g. page fault or interrupt), then the instruction count does not include the
+current instruction, otherwise it does.  That is consistent with whether or not
+that instruction has retired when the cycle count is updated.
+
+Another note, in the case of "branches" events, non-taken branches are not
+presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
+TNT packet that starts with a non-taken branch.  To see every possible IPC
+value, "instructions" events can be used e.g. --itrace=i0ns
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a sqlite or postgresql database.
+Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
+and to script exported-sql-viewer.py for an example of using the database.
+
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
+As mentioned above, it is easy to capture too much data.  One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors.  They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full.  See 'Buffer handling' below
+for more details.
+
+
+perf record
+-----------
+
+new event
+~~~~~~~~~
+
+The Intel PT kernel driver creates a new PMU for Intel PT.  PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+	-e intel_pt//
+
+will use a default config value.  Currently that is the same as
+
+	-e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+	-e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format.  They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call.  They correspond to bit
+fields in the IA32_RTIT_CTL MSR.  Here is a list of them and their definitions:
+
+	$ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+	/sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+	/sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+	/sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+	/sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+	/sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+	/sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+	/sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+	-e intel_pt/noretcomp=0/
+
+is the same as:
+
+	-e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+	-e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+	-e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+	u	userspace
+	k	kernel
+	h	hypervisor
+	G	guest
+	H	host
+	p	precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT.  So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             6
+	size                             112
+	config                           0x400
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	enable_on_exec                   1
+	sample_id_all                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+
+
+config terms
+~~~~~~~~~~~~
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms.  All the config terms are
+described below.
+
+tsc		Always supported.  Produces TSC timestamp packets to provide
+		timing information.  In some cases it is possible to decode
+		without timing information, for example a per-thread context
+		that does not overlap executable memory maps.
+
+		The default config selects tsc (i.e. tsc=1).
+
+noretcomp	Always supported.  Disables "return compression" so a TIP packet
+		is produced when a function returns.  Causes more packets to be
+		produced but might make decoding more reliable.
+
+		The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period	Allows the frequency of PSB packets to be specified.
+
+		The PSB packet is a synchronization packet that provides a
+		starting point for decoding or recovery from errors.
+
+		Support for psb_period is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+		which contains "1" if the feature is supported and "0"
+		otherwise.
+
+		Valid values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The psb_period value is converted to the approximate number of
+		trace bytes between PSB packets as:
+
+			2 ^ (value + 11)
+
+		e.g. value 3 means 16KiB bytes between PSBs
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/psb_period=15/u uname
+			Invalid psb_period for intel_pt. Valid values are: 0-5
+
+		If MTC packets are selected, the default config selects a value
+		of 3 (i.e. psb_period=3) or the nearest lower value that is
+		supported (0 is always supported).  Otherwise the default is 0.
+
+		If decoding is expected to be reliable and the buffer is large
+		then a large PSB period can be used.
+
+		Because a TSC packet is produced with PSB, the PSB period can
+		also affect the granularity to timing information in the absence
+		of MTC or CYC.
+
+mtc		Produces MTC timing packets.
+
+		MTC packets provide finer grain timestamp information than TSC
+		packets.  MTC packets record time using the hardware crystal
+		clock (CTC) which is related to TSC packets using a TMA packet.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/mtc
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+		The frequency of MTC packets can also be specified - see
+		mtc_period below.
+
+mtc_period	Specifies how frequently MTC packets are produced - see mtc
+		above for how to determine if MTC packets are supported.
+
+		Valid values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The mtc_period value is converted to the MTC frequency as:
+
+			CTC-frequency / (2 ^ value)
+
+		e.g. value 3 means one eighth of CTC-frequency
+
+		Where CTC is the hardware crystal clock, the frequency of which
+		can be related to TSC via values provided in cpuid leaf 0x15.
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/mtc_period=15/u uname
+			Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+		The default value is 3 or the nearest lower value
+		that is supported (0 is always supported).
+
+cyc		Produces CYC timing packets.
+
+		CYC packets provide even finer grain timestamp information than
+		MTC and TSC packets.  A CYC packet contains the number of CPU
+		cycles since the last CYC packet. Unlike MTC and TSC packets,
+		CYC packets are only sent when another packet is also sent.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+		The number of CYC packets produced can be reduced by specifying
+		a threshold - see cyc_thresh below.
+
+cyc_thresh	Specifies how frequently CYC packets are produced - see cyc
+		above for how to determine if CYC packets are supported.
+
+		Valid cyc_thresh values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The cyc_thresh value represents the minimum number of CPU cycles
+		that must have passed before a CYC packet can be sent.  The
+		number of CPU cycles is:
+
+			2 ^ (value - 1)
+
+		e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+		can be sent.  Note a CYC packet is still only sent when another
+		packet is sent, not at, e.g. every 8 CPU cycles.
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+			Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+		CYC packets are not requested by default.
+
+pt		Specifies pass-through which enables the 'branch' config term.
+
+		The default config selects 'pt' if it is available, so a user will
+		never need to specify this term.
+
+branch		Enable branch tracing.  Branch tracing is enabled by default so to
+		disable branch tracing use 'branch=0'.
+
+		The default config selects 'branch' if it is available.
+
+ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
+		is executed.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
+		provides the address of the ptwrite instruction.  In the absence of
+		fup_on_ptw, the decoder will use the address of the previous branch
+		if branch tracing is enabled, otherwise the address will be zero.
+		Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt		Enable power events.  The power events provide information about
+		changes to the CPU C-state.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+
+AUX area sampling option
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+To select Intel PT "sampling" the AUX area sampling option can be used:
+
+	--aux-sample
+
+Optionally it can be followed by the sample size in bytes e.g.
+
+	--aux-sample=8192
+
+In addition, the Intel PT event to sample must be defined e.g.
+
+	-e intel_pt//u
+
+Samples on other events will be created containing Intel PT data e.g. the
+following will create Intel PT samples on the branch-misses event, note the
+events must be grouped using {}:
+
+	perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
+
+An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
+events.  In this case, the grouping is implied e.g.
+
+	perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
+
+is the same as:
+
+	perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
+
+but allows for also using an address filter e.g.:
+
+	perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
+
+It is important to select a sample size that is big enough to contain at least
+one PSB packet.  If not a warning will be displayed:
+
+	Intel PT sample size (%zu) may be too small for PSB period (%zu)
+
+The calculation used for that is: if sample_size <= psb_period + 256 display the
+warning.  When sampling is used, psb_period defaults to 0 (2KiB).
+
+The default sample size is 4KiB.
+
+The sample size is passed in aux_sample_size in struct perf_event_attr.  The
+sample size is limited by the maximum event size which is 64KiB.  It is
+difficult to know how big the event might be without the trace sample attached,
+but the tool validates that the sample size is not greater than 60KiB.
+
+
+new snapshot option
+~~~~~~~~~~~~~~~~~~~
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+	-S
+
+Optionally it can be followed by the snapshot size e.g.
+
+	-S0x100000
+
+The default snapshot size is the auxtrace mmap size.  If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+	Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+	-m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged.  The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits).  Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages.  In snapshot mode or sampling mode, it is the same but the
+minimum size is 1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+	mmap length 528384
+	auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT can be used in 2 modes:
+	full-trace mode
+	sample mode
+	snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+	perf record -e intel_pt//u uname
+
+Sample mode attaches a Intel PT sample to other events e.g.
+
+	perf record --aux-sample -e intel_pt//u -e branch-misses:u
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+	perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+	[1] 11435
+	kill -USR2 11435
+	Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+~~~~~~~~~~~~~~~
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER).  In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated.  However an interrupt must be used to switch
+between buffers.  That has two potential problems:
+	a) the interrupt may not be handled in time so that the current buffer
+	becomes full and some trace data is lost.
+	b) the interrupts may slow the system and affect the performance
+	results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around.  If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event.  If the driver has to
+wait, the intel_pt event gets disabled.  Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+~~~~~~~~~~~~~~~~~~~~~~
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled.  Deliberately, Intel PT is not
+decoded for that purpose (it would take too long).  Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+	perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+	perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE.  However doing that can also disable the
+collection of side-band information.  In order to prevent that,  a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled.  That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that.  To find the test:
+
+	perf test list
+	...
+	23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+	perf test 23
+	23: Test using a dummy software event to keep tracking     : Ok
+
+
+perf record modes (nothing new here)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+perf record essentially operates in one of three modes:
+	per thread
+	per cpu
+	workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced.  There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced.  Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed.  Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them.  That affects what buffer sizes they can
+have as outlined above.
+
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required.  So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
+
+	$ perf record -vv -e intel_pt//u uname
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             6
+	size                             112
+	config                           0x400
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	enable_on_exec                   1
+	sample_id_all                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             2
+	size                             112
+	config                           0x108
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+	read_format                      ID
+	inherit                          1
+	sample_id_all                    1
+	exclude_guest                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             1
+	size                             112
+	config                           0x9
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	mmap                             1
+	comm                             1
+	enable_on_exec                   1
+	task                             1
+	sample_id_all                    1
+	mmap2                            1
+	comm_exec                        1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	mmap size 528384B
+	AUX area mmap length 4194304
+	perf event ring buffer mmapped per cpu
+	Synthesizing auxtrace information
+	Linux
+	[ perf record: Woken up 1 times to write data ]
+	[ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+-----------
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+~~~~~~~~~~~~~~~~~~~
+
+Having no option is the same as
+
+	--itrace
+
+which, in turn, is the same as
+
+	--itrace=cepwx
+
+The letters are:
+
+	i	synthesize "instructions" events
+	b	synthesize "branches" events
+	x	synthesize "transactions" events
+	w	synthesize "ptwrite" events
+	p	synthesize "power" events
+	c	synthesize branches events (calls only)
+	r	synthesize branches events (returns only)
+	e	synthesize tracing error events
+	d	create a debug log
+	g	synthesize a call chain (use with i or x)
+	l	synthesize last branch entries (use with i or x)
+	s	skip initial number of events
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0".  Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used.  Refer to the config terms
+section above.  perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets.  While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used.  Refer to
+the config terms section above.  The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency.  perf script
+"event,synth" fields display information like this:
+	cbr:  cbr: 22 freq: 2189 MHz (200%)
+	mwait:  hints: 0x60 extensions: 0x1
+	pwre:  hw: 0 cstate: 2 sub-cstate: 0
+	exstop:  ip: 1
+	pwrx:  deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+	"cbr" includes the frequency and the percentage of maximum non-turbo
+	"mwait" shows mwait hints and extensions
+	"pwre" shows C-state transitions (to a C-state deeper than C0) and
+	whether	initiated by hardware
+	"exstop" indicates execution stopped and whether the IP was recorded
+	exactly,
+	"pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace.  Error events
+are quite important.  Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions.  Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+	--itrace=i10us
+
+sets the period to 10us i.e. one  instruction sample is synthesized for each 10
+microseconds of trace.  Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction.  Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction.  The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+	--itrace=ig32
+	--itrace=xg32
+
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+       --itrace=il10
+       --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+	--itrace=i0nss1000000
+
+skips the first million instructions.
+
+dump option
+~~~~~~~~~~~
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed.  The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely.  In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+-----------
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+-----------
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+	perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo.  It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5.  The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+	$ gcc-5 -O3 sort.c -o sort_optimized
+	$ ./sort_optimized 30000
+	Bubble sorting array of 30000 elements
+	2254 ms
+
+	$ cat ~/.perfconfig
+	[intel-pt]
+		mispred-all = on
+
+	$ perf record -e intel_pt//u ./sort 3000
+	Bubble sorting array of 3000 elements
+	58 ms
+	[ perf record: Woken up 2 times to write data ]
+	[ perf record: Captured and wrote 3.939 MB perf.data ]
+	$ perf inject -i perf.data -o inj --itrace=i100usle --strip
+	$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+	$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+	$ ./sort_autofdo 30000
+	Bubble sorting array of 30000 elements
+	2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
+
+
+PEBS via Intel PT
+-----------------
+
+Some hardware has the feature to redirect PEBS records to the Intel PT trace.
+Recording is selected by using the aux-output config term e.g.
+
+	perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
+
+Note that currently, software only supports redirecting at most one PEBS event.
+
+To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
+
+	perf script --itrace=oe
+
+
+SEE ALSO
+--------
+
+linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1],
+linkperf:perf-inject[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b23a4012a606..7f4db7592467 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -589,4 +589,4 @@ appended unit character - B/K/M/G
 
 SEE ALSO
 --------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index db61f16ffa56..bd0a029d4c08 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -546,4 +546,5 @@ include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
 --------
-linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2599b057e47b..db6a36aac47e 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -429,4 +429,4 @@ include::itrace.txt[]
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
-linkperf:perf-script-python[1]
+linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..4d56586b2fb9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,15 @@ Configure all used events to run in kernel space.
 --all-user::
 Configure all used events to run in user space.
 
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for post processing.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7902a5681fc8..b8fc7d972be9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -35,7 +35,7 @@ endif
 # Only pass canonical directory names as the output directory:
 #
 ifneq ($(O),)
-  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O))
 endif
 
 #
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2898cfdf8fe1..941f814820b8 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
 	free(ptr);
 }
 
-static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
-{
-	struct cs_etm_recording *ptr =
-			container_of(itr, struct cs_etm_recording, itr);
-	struct evsel *evsel;
-
-	evlist__for_each_entry(ptr->evlist, evsel) {
-		if (evsel->core.attr.type == ptr->cs_etm_pmu->type)
-			return perf_evlist__enable_event_idx(ptr->evlist,
-							     evsel, idx);
-	}
-
-	return -EINVAL;
-}
-
 struct auxtrace_record *cs_etm_record_init(int *err)
 {
 	struct perf_pmu *cs_etm_pmu;
@@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
 	}
 
 	ptr->cs_etm_pmu			= cs_etm_pmu;
+	ptr->itr.pmu			= cs_etm_pmu;
 	ptr->itr.parse_snapshot_options	= cs_etm_parse_snapshot_options;
 	ptr->itr.recording_options	= cs_etm_recording_options;
 	ptr->itr.info_priv_size		= cs_etm_info_priv_size;
@@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
 	ptr->itr.snapshot_finish	= cs_etm_snapshot_finish;
 	ptr->itr.reference		= cs_etm_reference;
 	ptr->itr.free			= cs_etm_recording_free;
-	ptr->itr.read_finish		= cs_etm_read_finish;
+	ptr->itr.read_finish		= auxtrace_record__read_finish;
 
 	*err = 0;
 	return &ptr->itr;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index eba6541ec0f1..27653be24447 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -11,17 +11,17 @@
 #include <linux/zalloc.h>
 #include <time.h>
 
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/session.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/session.h"
 #include <internal/lib.h> // page_size
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/arm-spe.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/arm-spe.h"
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
 	free(sper);
 }
 
-static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
-{
-	struct arm_spe_recording *sper =
-			container_of(itr, struct arm_spe_recording, itr);
-	struct evsel *evsel;
-
-	evlist__for_each_entry(sper->evlist, evsel) {
-		if (evsel->core.attr.type == sper->arm_spe_pmu->type)
-			return perf_evlist__enable_event_idx(sper->evlist,
-							     evsel, idx);
-	}
-	return -EINVAL;
-}
-
 struct auxtrace_record *arm_spe_recording_init(int *err,
 					       struct perf_pmu *arm_spe_pmu)
 {
@@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
 	}
 
 	sper->arm_spe_pmu = arm_spe_pmu;
+	sper->itr.pmu = arm_spe_pmu;
 	sper->itr.recording_options = arm_spe_recording_options;
 	sper->itr.info_priv_size = arm_spe_info_priv_size;
 	sper->itr.info_fill = arm_spe_info_fill;
 	sper->itr.free = arm_spe_recording_free;
 	sper->itr.reference = arm_spe_reference;
-	sper->itr.read_finish = arm_spe_read_finish;
+	sper->itr.read_finish = auxtrace_record__read_finish;
 	sper->itr.alignment = 0;
 
 	*err = 0;
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2864e2e3776d..2833e101a7c6 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../../util/perf_regs.h"
+#include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	nospu	clone3				ppc_clone3
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index e9c436eeffc9..0a5242900248 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -4,8 +4,8 @@
 #include <regex.h>
 #include <linux/zalloc.h>
 
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
 
 #include <linux/kernel.h>
 
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index e6461abc9e7b..9708ae892061 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2085,6 +2085,118 @@
 "67 f3 0f 38 f8 1c    \tenqcmds (%si),%bx",},
 {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
 "67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8          \tincsspd %eax",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28             \txrstor (%eax)",},
+{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f ae 2d 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8             \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8          \trdsspd %eax",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea          \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28          \trstorssp (%eax)",},
+{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08          \twrssd  %ecx,(%eax)",},
+{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 f6 15 78 56 34 12 \twrssd  %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd  %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08       \twrussd %ecx,(%eax)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8          \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee             \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef             \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30          \tclrssbsy (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb          \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa          \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0                \tcall   *%eax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10                \tcall   *(%eax)",},
+{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect",
+"ff 15 78 56 34 12    \tcall   *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcall   *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0             \tbnd call *%eax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10             \tbnd call *(%eax)",},
+{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0             \tnotrack call *%eax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10             \tnotrack call *(%eax)",},
+{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0          \tnotrack bnd call *%eax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10          \tnotrack bnd call *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0                \tjmp    *%eax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20                \tjmp    *(%eax)",},
+{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect",
+"ff 25 78 56 34 12    \tjmp    *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmp    *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0             \tbnd jmp *%eax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20             \tbnd jmp *(%eax)",},
+{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0             \tnotrack jmp *%eax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20             \tnotrack jmp *(%eax)",},
+{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0          \tnotrack bnd jmp *%eax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20          \tnotrack bnd jmp *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 567ecccfad7c..5da17d41d302 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2263,6 +2263,202 @@
 "67 f3 0f 38 f8 18    \tenqcmds (%eax),%ebx",},
 {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
 "67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8          \tincsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 41 0f ae e8       \tincsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 48 0f ae e8       \tincsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 49 0f ae e8       \tincsspq %r8",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28             \txrstor (%rax)",},
+{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "",
+"41 0f ae 28          \txrstor (%r8)",},
+{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8             \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8          \trdsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 41 0f 1e c8       \trdsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 48 0f 1e c8       \trdsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 49 0f 1e c8       \trdsspq %r8",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea          \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28          \trstorssp (%rax)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "",
+"f3 41 0f 01 28       \trstorssp (%r8)",},
+{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08          \twrssd  %ecx,(%rax)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"41 0f 38 f6 10       \twrssd  %edx,(%r8)",},
+{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 14 25 78 56 34 12 \twrssd  %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd  %edx,0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"41 0f 38 f6 94 c8 78 56 34 12 \twrssd  %edx,0x12345678(%r8,%rcx,8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "",
+"48 0f 38 f6 08       \twrssq  %rcx,(%rax)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"49 0f 38 f6 10       \twrssq  %rdx,(%r8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 14 25 78 56 34 12 \twrssq  %rdx,0x12345678",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 94 c8 78 56 34 12 \twrssq  %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"49 0f 38 f6 94 c8 78 56 34 12 \twrssq  %rdx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08       \twrussd %ecx,(%rax)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 41 0f 38 f5 10    \twrussd %edx,(%r8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "",
+"66 48 0f 38 f5 08    \twrussq %rcx,(%rax)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 49 0f 38 f5 10    \twrussq %rdx,(%r8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8          \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee             \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef             \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30          \tclrssbsy (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "",
+"f3 41 0f ae 30       \tclrssbsy (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb          \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa          \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0                \tcallq  *%rax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10                \tcallq  *(%rax)",},
+{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"41 ff 10             \tcallq  *(%r8)",},
+{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 14 25 78 56 34 12 \tcallq  *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcallq  *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"41 ff 94 c8 78 56 34 12 \tcallq  *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0             \tbnd callq *%rax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10             \tbnd callq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"f2 41 ff 10          \tbnd callq *(%r8)",},
+{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0             \tnotrack callq *%rax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10             \tnotrack callq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e 41 ff 10          \tnotrack callq *(%r8)",},
+{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0          \tnotrack bnd callq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10          \tnotrack bnd callq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect",
+"3e f2 41 ff 10       \tnotrack bnd callq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect",
+"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0                \tjmpq   *%rax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20                \tjmpq   *(%rax)",},
+{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"41 ff 20             \tjmpq   *(%r8)",},
+{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff 24 25 78 56 34 12 \tjmpq   *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmpq   *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"41 ff a4 c8 78 56 34 12 \tjmpq   *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0             \tbnd jmpq *%rax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20             \tbnd jmpq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"f2 41 ff 20          \tbnd jmpq *(%r8)",},
+{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0             \tnotrack jmpq *%rax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20             \tnotrack jmpq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e 41 ff 20          \tnotrack jmpq *(%r8)",},
+{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0          \tnotrack bnd jmpq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20          \tnotrack bnd jmpq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect",
+"3e f2 41 ff 20       \tnotrack bnd jmpq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
+"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index ddbf07c50bb8..c3808e94c46e 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1771,6 +1771,145 @@ int main(void)
 	asm volatile("enqcmds (%eax),%ebx");
 	asm volatile("enqcmds 0x12345678(%eax),%ecx");
 
+	/* incsspd/q */
+
+	asm volatile("incsspd %eax");
+	asm volatile("incsspd %r8d");
+	asm volatile("incsspq %rax");
+	asm volatile("incsspq %r8");
+	/* Also check instructions in the same group encoding as incsspd/q */
+	asm volatile("xrstor (%rax)");
+	asm volatile("xrstor (%r8)");
+	asm volatile("xrstor (0x12345678)");
+	asm volatile("xrstor 0x12345678(%rax,%rcx,8)");
+	asm volatile("xrstor 0x12345678(%r8,%rcx,8)");
+	asm volatile("lfence");
+
+	/* rdsspd/q */
+
+	asm volatile("rdsspd %eax");
+	asm volatile("rdsspd %r8d");
+	asm volatile("rdsspq %rax");
+	asm volatile("rdsspq %r8");
+
+	/* saveprevssp */
+
+	asm volatile("saveprevssp");
+
+	/* rstorssp */
+
+	asm volatile("rstorssp (%rax)");
+	asm volatile("rstorssp (%r8)");
+	asm volatile("rstorssp (0x12345678)");
+	asm volatile("rstorssp 0x12345678(%rax,%rcx,8)");
+	asm volatile("rstorssp 0x12345678(%r8,%rcx,8)");
+
+	/* wrssd/q */
+
+	asm volatile("wrssd %ecx,(%rax)");
+	asm volatile("wrssd %edx,(%r8)");
+	asm volatile("wrssd %edx,(0x12345678)");
+	asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)");
+	asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)");
+	asm volatile("wrssq %rcx,(%rax)");
+	asm volatile("wrssq %rdx,(%r8)");
+	asm volatile("wrssq %rdx,(0x12345678)");
+	asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)");
+	asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)");
+
+	/* wrussd/q */
+
+	asm volatile("wrussd %ecx,(%rax)");
+	asm volatile("wrussd %edx,(%r8)");
+	asm volatile("wrussd %edx,(0x12345678)");
+	asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)");
+	asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)");
+	asm volatile("wrussq %rcx,(%rax)");
+	asm volatile("wrussq %rdx,(%r8)");
+	asm volatile("wrussq %rdx,(0x12345678)");
+	asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)");
+	asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)");
+
+	/* setssbsy */
+
+	asm volatile("setssbsy");
+	/* Also check instructions in the same group encoding as setssbsy */
+	asm volatile("rdpkru");
+	asm volatile("wrpkru");
+
+	/* clrssbsy */
+
+	asm volatile("clrssbsy (%rax)");
+	asm volatile("clrssbsy (%r8)");
+	asm volatile("clrssbsy (0x12345678)");
+	asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)");
+	asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)");
+
+	/* endbr32/64 */
+
+	asm volatile("endbr32");
+	asm volatile("endbr64");
+
+	/* call with/without notrack prefix */
+
+	asm volatile("callq *%rax");				/* Expecting: call indirect 0 */
+	asm volatile("callq *(%rax)");				/* Expecting: call indirect 0 */
+	asm volatile("callq *(%r8)");				/* Expecting: call indirect 0 */
+	asm volatile("callq *(0x12345678)");			/* Expecting: call indirect 0 */
+	asm volatile("callq *0x12345678(%rax,%rcx,8)");		/* Expecting: call indirect 0 */
+	asm volatile("callq *0x12345678(%r8,%rcx,8)");		/* Expecting: call indirect 0 */
+
+	asm volatile("bnd callq *%rax");			/* Expecting: call indirect 0 */
+	asm volatile("bnd callq *(%rax)");			/* Expecting: call indirect 0 */
+	asm volatile("bnd callq *(%r8)");			/* Expecting: call indirect 0 */
+	asm volatile("bnd callq *(0x12345678)");		/* Expecting: call indirect 0 */
+	asm volatile("bnd callq *0x12345678(%rax,%rcx,8)");	/* Expecting: call indirect 0 */
+	asm volatile("bnd callq *0x12345678(%r8,%rcx,8)");	/* Expecting: call indirect 0 */
+
+	asm volatile("notrack callq *%rax");			/* Expecting: call indirect 0 */
+	asm volatile("notrack callq *(%rax)");			/* Expecting: call indirect 0 */
+	asm volatile("notrack callq *(%r8)");			/* Expecting: call indirect 0 */
+	asm volatile("notrack callq *(0x12345678)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack callq *0x12345678(%rax,%rcx,8)");	/* Expecting: call indirect 0 */
+	asm volatile("notrack callq *0x12345678(%r8,%rcx,8)");	/* Expecting: call indirect 0 */
+
+	asm volatile("notrack bnd callq *%rax");		/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd callq *(%rax)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd callq *(%r8)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd callq *(0x12345678)");	/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)");	/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)");	/* Expecting: call indirect 0 */
+
+	/* jmp with/without notrack prefix */
+
+	asm volatile("jmpq *%rax");				/* Expecting: jmp indirect 0 */
+	asm volatile("jmpq *(%rax)");				/* Expecting: jmp indirect 0 */
+	asm volatile("jmpq *(%r8)");				/* Expecting: jmp indirect 0 */
+	asm volatile("jmpq *(0x12345678)");			/* Expecting: jmp indirect 0 */
+	asm volatile("jmpq *0x12345678(%rax,%rcx,8)");		/* Expecting: jmp indirect 0 */
+	asm volatile("jmpq *0x12345678(%r8,%rcx,8)");		/* Expecting: jmp indirect 0 */
+
+	asm volatile("bnd jmpq *%rax");				/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmpq *(%rax)");			/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmpq *(%r8)");			/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmpq *(0x12345678)");			/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)");	/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)");	/* Expecting: jmp indirect 0 */
+
+	asm volatile("notrack jmpq *%rax");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmpq *(%rax)");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmpq *(%r8)");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmpq *(0x12345678)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)");	/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)");	/* Expecting: jmp indirect 0 */
+
+	asm volatile("notrack bnd jmpq *%rax");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmpq *(%rax)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmpq *(%r8)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmpq *(0x12345678)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)");	/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)");	/* Expecting: jmp indirect 0 */
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -3434,6 +3573,103 @@ int main(void)
 	asm volatile("enqcmds (%si),%bx");
 	asm volatile("enqcmds 0x1234(%si),%cx");
 
+	/* incsspd */
+
+	asm volatile("incsspd %eax");
+	/* Also check instructions in the same group encoding as incsspd */
+	asm volatile("xrstor (%eax)");
+	asm volatile("xrstor (0x12345678)");
+	asm volatile("xrstor 0x12345678(%eax,%ecx,8)");
+	asm volatile("lfence");
+
+	/* rdsspd */
+
+	asm volatile("rdsspd %eax");
+
+	/* saveprevssp */
+
+	asm volatile("saveprevssp");
+
+	/* rstorssp */
+
+	asm volatile("rstorssp (%eax)");
+	asm volatile("rstorssp (0x12345678)");
+	asm volatile("rstorssp 0x12345678(%eax,%ecx,8)");
+
+	/* wrssd */
+
+	asm volatile("wrssd %ecx,(%eax)");
+	asm volatile("wrssd %edx,(0x12345678)");
+	asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)");
+
+	/* wrussd */
+
+	asm volatile("wrussd %ecx,(%eax)");
+	asm volatile("wrussd %edx,(0x12345678)");
+	asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)");
+
+	/* setssbsy */
+
+	asm volatile("setssbsy");
+	/* Also check instructions in the same group encoding as setssbsy */
+	asm volatile("rdpkru");
+	asm volatile("wrpkru");
+
+	/* clrssbsy */
+
+	asm volatile("clrssbsy (%eax)");
+	asm volatile("clrssbsy (0x12345678)");
+	asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)");
+
+	/* endbr32/64 */
+
+	asm volatile("endbr32");
+	asm volatile("endbr64");
+
+	/* call with/without notrack prefix */
+
+	asm volatile("call *%eax");				/* Expecting: call indirect 0 */
+	asm volatile("call *(%eax)");				/* Expecting: call indirect 0 */
+	asm volatile("call *(0x12345678)");			/* Expecting: call indirect 0 */
+	asm volatile("call *0x12345678(%eax,%ecx,8)");		/* Expecting: call indirect 0 */
+
+	asm volatile("bnd call *%eax");				/* Expecting: call indirect 0 */
+	asm volatile("bnd call *(%eax)");			/* Expecting: call indirect 0 */
+	asm volatile("bnd call *(0x12345678)");			/* Expecting: call indirect 0 */
+	asm volatile("bnd call *0x12345678(%eax,%ecx,8)");	/* Expecting: call indirect 0 */
+
+	asm volatile("notrack call *%eax");			/* Expecting: call indirect 0 */
+	asm volatile("notrack call *(%eax)");			/* Expecting: call indirect 0 */
+	asm volatile("notrack call *(0x12345678)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack call *0x12345678(%eax,%ecx,8)");	/* Expecting: call indirect 0 */
+
+	asm volatile("notrack bnd call *%eax");			/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd call *(%eax)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd call *(0x12345678)");		/* Expecting: call indirect 0 */
+	asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+	/* jmp with/without notrack prefix */
+
+	asm volatile("jmp *%eax");				/* Expecting: jmp indirect 0 */
+	asm volatile("jmp *(%eax)");				/* Expecting: jmp indirect 0 */
+	asm volatile("jmp *(0x12345678)");			/* Expecting: jmp indirect 0 */
+	asm volatile("jmp *0x12345678(%eax,%ecx,8)");		/* Expecting: jmp indirect 0 */
+
+	asm volatile("bnd jmp *%eax");				/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmp *(%eax)");			/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmp *(0x12345678)");			/* Expecting: jmp indirect 0 */
+	asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)");	/* Expecting: jmp indirect 0 */
+
+	asm volatile("notrack jmp *%eax");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmp *(%eax)");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmp *(0x12345678)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)");	/* Expecting: jmp indirect 0 */
+
+	asm volatile("notrack bnd jmp *%eax");			/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmp *(%eax)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmp *(0x12345678)");		/* Expecting: jmp indirect 0 */
+	asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
 #endif /* #ifndef __x86_64__ */
 
 	/* SGX */
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7abc9fd4cbec..3da506e13f49 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -7,13 +7,13 @@
 #include <errno.h>
 #include <stdbool.h>
 
-#include "../../util/header.h"
-#include "../../util/debug.h"
-#include "../../util/pmu.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/evlist.h"
+#include "../../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/pmu.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/evlist.h"
 
 static
 struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index ac45015cc6ba..047dc00eafa6 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -3,12 +3,12 @@
 #include <linux/string.h>
 #include <linux/zalloc.h>
 
-#include "../../util/event.h"
-#include "../../util/synthetic-events.h"
-#include "../../util/machine.h"
-#include "../../util/tool.h"
-#include "../../util/map.h"
-#include "../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
 
 #if defined(__x86_64__)
 
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index aa6deb463bf3..578c8c568ffd 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -7,8 +7,8 @@
 #include <string.h>
 #include <regex.h>
 
-#include "../../util/debug.h"
-#include "../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/header.h"
 
 static inline void
 cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 27d9e214d068..09f93800bffd 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -11,18 +11,18 @@
 #include <linux/log2.h>
 #include <linux/zalloc.h>
 
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/mmap.h"
-#include "../../util/session.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/record.h"
-#include "../../util/tsc.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-bts.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/record.h"
+#include "../../../util/tsc.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-bts.h"
 #include <internal/lib.h> // page_size
 
 #define KiB(x) ((x) * 1024)
@@ -413,20 +413,6 @@ out_err:
 	return err;
 }
 
-static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
-{
-	struct intel_bts_recording *btsr =
-			container_of(itr, struct intel_bts_recording, itr);
-	struct evsel *evsel;
-
-	evlist__for_each_entry(btsr->evlist, evsel) {
-		if (evsel->core.attr.type == btsr->intel_bts_pmu->type)
-			return perf_evlist__enable_event_idx(btsr->evlist,
-							     evsel, idx);
-	}
-	return -EINVAL;
-}
-
 struct auxtrace_record *intel_bts_recording_init(int *err)
 {
 	struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
@@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
 	}
 
 	btsr->intel_bts_pmu = intel_bts_pmu;
+	btsr->itr.pmu = intel_bts_pmu;
 	btsr->itr.recording_options = intel_bts_recording_options;
 	btsr->itr.info_priv_size = intel_bts_info_priv_size;
 	btsr->itr.info_fill = intel_bts_info_fill;
@@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
 	btsr->itr.find_snapshot = intel_bts_find_snapshot;
 	btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
 	btsr->itr.reference = intel_bts_reference;
-	btsr->itr.read_finish = intel_bts_read_finish;
+	btsr->itr.read_finish = auxtrace_record__read_finish;
 	btsr->itr.alignment = sizeof(struct branch);
 	return &btsr->itr;
 }
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 20df442fdf36..1643aed8c4c8 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -13,23 +13,23 @@
 #include <linux/zalloc.h>
 #include <cpuid.h>
 
-#include "../../util/session.h"
-#include "../../util/event.h"
-#include "../../util/evlist.h"
-#include "../../util/evsel.h"
-#include "../../util/evsel_config.h"
-#include "../../util/cpumap.h"
-#include "../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/mmap.h"
 #include <subcmd/parse-options.h>
-#include "../../util/parse-events.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/target.h"
-#include "../../util/tsc.h"
+#include "../../../util/parse-events.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/target.h"
+#include "../../../util/tsc.h"
 #include <internal/lib.h> // page_size
-#include "../../util/intel-pt.h"
+#include "../../../util/intel-pt.h"
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
 	return rdtsc();
 }
 
-static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
-{
-	struct intel_pt_recording *ptr =
-			container_of(itr, struct intel_pt_recording, itr);
-	struct evsel *evsel;
-
-	evlist__for_each_entry(ptr->evlist, evsel) {
-		if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
-			return perf_evlist__enable_event_idx(ptr->evlist, evsel,
-							     idx);
-	}
-	return -EINVAL;
-}
-
 struct auxtrace_record *intel_pt_recording_init(int *err)
 {
 	struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
@@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
 	}
 
 	ptr->intel_pt_pmu = intel_pt_pmu;
+	ptr->itr.pmu = intel_pt_pmu;
 	ptr->itr.recording_options = intel_pt_recording_options;
 	ptr->itr.info_priv_size = intel_pt_info_priv_size;
 	ptr->itr.info_fill = intel_pt_info_fill;
@@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
 	ptr->itr.find_snapshot = intel_pt_find_snapshot;
 	ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
 	ptr->itr.reference = intel_pt_reference;
-	ptr->itr.read_finish = intel_pt_read_finish;
+	ptr->itr.read_finish = auxtrace_record__read_finish;
 	/*
 	 * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
 	 * should give at least 1 PSB per sample.
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
index e17e080e76f4..31679c35d493 100644
--- a/tools/perf/arch/x86/util/machine.c
+++ b/tools/perf/arch/x86/util/machine.c
@@ -5,9 +5,9 @@
 #include <stdlib.h>
 
 #include <internal/lib.h> // page_size
-#include "../../util/machine.h"
-#include "../../util/map.h"
-#include "../../util/symbol.h"
+#include "../../../util/machine.h"
+#include "../../../util/map.h"
+#include "../../../util/symbol.h"
 #include <linux/ctype.h>
 
 #include <symbol/kallsyms.h>
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index c218b83e063b..fca81b39b09f 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -5,10 +5,10 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 
-#include "../../perf-sys.h"
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
-#include "../../util/event.h"
+#include "../../../perf-sys.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
 
 const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG(AX, PERF_REG_X86_AX),
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e33ef5bc31c5..d48d608517fd 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -4,9 +4,9 @@
 #include <linux/stddef.h>
 #include <linux/perf_event.h>
 
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/pmu.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/pmu.h"
 
 struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 {
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fddb3ced9db6..4aa6de1aa67d 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -2,6 +2,10 @@
 #ifndef BENCH_H
 #define BENCH_H
 
+#include <sys/time.h>
+
+extern struct timeval bench__start, bench__end, bench__runtime;
+
 /*
  * The madvise transparent hugepage constants were added in glibc
  * 2.13. For compatibility with older versions of glibc, define these
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index bb617e568841..cadc18d42aa4 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -35,7 +35,6 @@
 
 static unsigned int nthreads = 0;
 static unsigned int nsecs    = 8;
-struct timeval start, end, runtime;
 static bool done, __verbose, randomize;
 
 /*
@@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused,
 {
 	/* inform all threads that we're done for the day */
 	done = true;
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &runtime);
+	gettimeofday(&bench__end, NULL);
+	timersub(&bench__end, &bench__start, &bench__runtime);
 }
 
 static void nest_epollfd(void)
@@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv)
 
 	threads_starting = nthreads;
 
-	gettimeofday(&start, NULL);
+	gettimeofday(&bench__start, NULL);
 
 	do_threads(worker, cpu);
 
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 7af694437f4e..f938c585d512 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -90,7 +90,6 @@
 
 static unsigned int nthreads = 0;
 static unsigned int nsecs    = 8;
-struct timeval start, end, runtime;
 static bool wdone, done, __verbose, randomize, nonblocking;
 
 /*
@@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused,
 {
 	/* inform all threads that we're done for the day */
 	done = true;
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &runtime);
+	gettimeofday(&bench__end, NULL);
+	timersub(&bench__end, &bench__start, &bench__runtime);
 }
 
 static void print_summary(void)
@@ -287,7 +286,7 @@ static void print_summary(void)
 
 	printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
 	       avg, rel_stddev_stats(stddev, avg),
-	       (int) runtime.tv_sec);
+	       (int)bench__runtime.tv_sec);
 }
 
 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
@@ -427,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -479,7 +479,7 @@ int bench_epoll_wait(int argc, const char **argv)
 
 	threads_starting = nthreads;
 
-	gettimeofday(&start, NULL);
+	gettimeofday(&bench__start, NULL);
 
 	do_threads(worker, cpu);
 
@@ -519,7 +519,7 @@ int bench_epoll_wait(int argc, const char **argv)
 		qsort(worker, nthreads, sizeof(struct worker), cmpworker);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops/runtime.tv_sec;
+		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
 
 		update_stats(&throughput_stats, t);
 
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8ba0c3330a9a..65eebe06c04d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024;
 static bool fshared = false, done = false, silent = false;
 static int futex_flag = 0;
 
-struct timeval start, end, runtime;
+struct timeval bench__start, bench__end, bench__runtime;
 static pthread_mutex_t thread_lock;
 static unsigned int threads_starting;
 static struct stats throughput_stats;
@@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused,
 {
 	/* inform all threads that we're done for the day */
 	done = true;
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &runtime);
+	gettimeofday(&bench__end, NULL);
+	timersub(&bench__end, &bench__start, &bench__runtime);
 }
 
 static void print_summary(void)
@@ -114,7 +114,7 @@ static void print_summary(void)
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
 	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
-	       (int) runtime.tv_sec);
+	       (int)bench__runtime.tv_sec);
 }
 
 int bench_futex_hash(int argc, const char **argv)
@@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv)
 	if (!cpu)
 		goto errmem;
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -161,7 +162,7 @@ int bench_futex_hash(int argc, const char **argv)
 
 	threads_starting = nthreads;
 	pthread_attr_init(&thread_attr);
-	gettimeofday(&start, NULL);
+	gettimeofday(&bench__start, NULL);
 	for (i = 0; i < nthreads; i++) {
 		worker[i].tid = i;
 		worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
@@ -204,7 +205,7 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_mutex_destroy(&thread_lock);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops/runtime.tv_sec;
+		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
 		update_stats(&throughput_stats, t);
 		if (!silent) {
 			if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index d0cae8125423..89fd8f325f38 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -37,7 +37,6 @@ static bool silent = false, multi = false;
 static bool done = false, fshared = false;
 static unsigned int nthreads = 0;
 static int futex_flag = 0;
-struct timeval start, end, runtime;
 static pthread_mutex_t thread_lock;
 static unsigned int threads_starting;
 static struct stats throughput_stats;
@@ -64,7 +63,7 @@ static void print_summary(void)
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
 	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
-	       (int) runtime.tv_sec);
+	       (int)bench__runtime.tv_sec);
 }
 
 static void toggle_done(int sig __maybe_unused,
@@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused,
 {
 	/* inform all threads that we're done for the day */
 	done = true;
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &runtime);
+	gettimeofday(&bench__end, NULL);
+	timersub(&bench__end, &bench__start, &bench__runtime);
 }
 
 static void *workerfn(void *arg)
@@ -161,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -185,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 
 	threads_starting = nthreads;
 	pthread_attr_init(&thread_attr);
-	gettimeofday(&start, NULL);
+	gettimeofday(&bench__start, NULL);
 
 	create_threads(worker, thread_attr, cpu);
 	pthread_attr_destroy(&thread_attr);
@@ -211,7 +211,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_mutex_destroy(&thread_lock);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops/runtime.tv_sec;
+		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
 
 		update_stats(&throughput_stats, t);
 		if (!silent)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a00a6891447a..7a15c2e61022 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv)
 	if (!cpu)
 		err(EXIT_FAILURE, "cpu_map__new");
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index a053cf2b7039..cd2b81a845ac 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index df810096abfe..2dfcef3e371e 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
 static int futex_flag = 0;
 
 static const struct option options[] = {
@@ -136,12 +136,13 @@ int bench_futex_wake(int argc, const char **argv)
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
+	memset(&act, 0, sizeof(act));
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads)
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ff61795a4d13..6c0a0412502e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -566,6 +566,8 @@ int cmd_annotate(int argc, const char **argv)
 	if (ret < 0)
 		return ret;
 
+	annotation_config__init(&annotate.opts);
+
 	argc = parse_options(argc, argv, options, annotate_usage, 0);
 	if (argc) {
 		/*
@@ -605,8 +607,6 @@ int cmd_annotate(int argc, const char **argv)
 	if (ret < 0)
 		goto out_delete;
 
-	annotation_config__init();
-
 	symbol_conf.try_vmlinux_path = true;
 
 	ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f8b6ae557d8b..5e697cd2224a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -572,29 +572,12 @@ static void init_block_hist(struct block_hist *bh)
 	bh->valid = true;
 }
 
-static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
-{
-	struct block_info *bi_a = a->block_info;
-	struct block_info *bi_b = b->block_info;
-	int cmp;
-
-	if (!bi_a->sym || !bi_b->sym)
-		return -1;
-
-	cmp = strcmp(bi_a->sym->name, bi_b->sym->name);
-
-	if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end))
-		return 0;
-
-	return -1;
-}
-
 static struct hist_entry *get_block_pair(struct hist_entry *he,
 					 struct hists *hists_pair)
 {
 	struct rb_root_cached *root = hists_pair->entries_in;
 	struct rb_node *next = rb_first_cached(root);
-	int cmp;
+	int64_t cmp;
 
 	while (next != NULL) {
 		struct hist_entry *he_pair = rb_entry(next, struct hist_entry,
@@ -602,7 +585,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
 
 		next = rb_next(&he_pair->rb_node_in);
 
-		cmp = block_pair_cmp(he_pair, he);
+		cmp = __block_info__cmp(he_pair, he);
 		if (!cmp)
 			return he_pair;
 	}
@@ -1312,7 +1295,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
 	end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
 				he->ms.sym);
 
-	if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+	if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+	    (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
 		scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
 			  start_line, end_line, block_he->diff.cycles);
 	} else {
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 26bc5923e6b5..70548df2abb9 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter)
 		ret = probe_file__del_strlist(kfd, klist);
 		if (ret < 0)
 			goto error;
-	}
+	} else if (ret == -ENOMEM)
+		goto error;
 
 	ret2 = probe_file__get_events(ufd, filter, ulist);
 	if (ret2 == 0) {
@@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter)
 		ret2 = probe_file__del_strlist(ufd, ulist);
 		if (ret2 < 0)
 			goto error;
-	}
+	} else if (ret2 == -ENOMEM)
+		goto error;
 
 	if (ret == -ENOENT && ret2 == -ENOENT)
 		pr_warning("\"%s\" does not hit any event.\n", str);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9483b3f0cae3..5f4045df76f4 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -104,6 +104,7 @@ struct report {
 	bool			symbol_ipc;
 	bool			total_cycles_mode;
 	struct block_report	*block_reports;
+	int			nr_block_reports;
 };
 
 static int report__config(const char *var, const char *value, void *cb)
@@ -185,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
 {
 	struct hist_entry *he = iter->he;
 	struct report *rep = arg;
-	struct branch_info *bi;
+	struct branch_info *bi = he->branch_info;
 	struct perf_sample *sample = iter->sample;
 	struct evsel *evsel = iter->evsel;
 	int err;
 
+	branch_type_count(&rep->brtype_stat, &bi->flags,
+			  bi->from.addr, bi->to.addr);
+
 	if (!ui__has_annotation() && !rep->symbol_ipc)
 		return 0;
 
-	bi = he->branch_info;
 	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
 	if (err)
 		goto out;
 
 	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
 
-	branch_type_count(&rep->brtype_stat, &bi->flags,
-			  bi->from.addr, bi->to.addr);
-
 out:
 	return err;
 }
@@ -966,8 +966,19 @@ static int __cmd_report(struct report *rep)
 	report__output_resort(rep);
 
 	if (rep->total_cycles_mode) {
+		int block_hpps[6] = {
+			PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+			PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+			PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+			PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+			PERF_HPP_REPORT__BLOCK_RANGE,
+			PERF_HPP_REPORT__BLOCK_DSO,
+		};
+
 		rep->block_reports = block_info__create_report(session->evlist,
-							       rep->total_cycles);
+							       rep->total_cycles,
+							       block_hpps, 6,
+							       &rep->nr_block_reports);
 		if (!rep->block_reports)
 			return -1;
 	}
@@ -1507,7 +1518,7 @@ repeat:
 			symbol_conf.priv_size += sizeof(u32);
 			symbol_conf.sort_by_name = true;
 		}
-		annotation_config__init();
+		annotation_config__init(&report.annotation_opts);
 	}
 
 	if (symbol__init(&session->header.env) < 0)
@@ -1551,8 +1562,11 @@ error:
 		zfree(&report.ptime_range);
 	}
 
-	if (report.block_reports)
-		zfree(&report.block_reports);
+	if (report.block_reports) {
+		block_info__free_report(report.block_reports,
+					report.nr_block_reports);
+		report.block_reports = NULL;
+	}
 
 	zstd_fini(&(session->zstd_data));
 	perf_session__delete(session);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e2406b291c1c..656b347f6dd8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 					struct perf_event_attr *attr, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	struct addr_location alf, alt;
 	u64 i, from, to;
 	int printed = 0;
@@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 		return 0;
 
 	for (i = 0; i < br->nr; i++) {
-		from = br->entries[i].from;
-		to   = br->entries[i].to;
+		from = entries[i].from;
+		to   = entries[i].to;
 
 		if (PRINT_FIELD(DSO)) {
 			memset(&alf, 0, sizeof(alf));
@@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 		}
 
 		printed += fprintf(fp, "/%c/%c/%c/%d ",
-			mispred_str( br->entries + i),
-			br->entries[i].flags.in_tx? 'X' : '-',
-			br->entries[i].flags.abort? 'A' : '-',
-			br->entries[i].flags.cycles);
+			mispred_str(entries + i),
+			entries[i].flags.in_tx ? 'X' : '-',
+			entries[i].flags.abort ? 'A' : '-',
+			entries[i].flags.cycles);
 	}
 
 	return printed;
@@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 					   struct perf_event_attr *attr, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	struct addr_location alf, alt;
 	u64 i, from, to;
 	int printed = 0;
@@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 
 		memset(&alf, 0, sizeof(alf));
 		memset(&alt, 0, sizeof(alt));
-		from = br->entries[i].from;
-		to   = br->entries[i].to;
+		from = entries[i].from;
+		to   = entries[i].to;
 
 		thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
 		thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
@@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 			printed += fprintf(fp, ")");
 		}
 		printed += fprintf(fp, "/%c/%c/%c/%d ",
-			mispred_str( br->entries + i),
-			br->entries[i].flags.in_tx? 'X' : '-',
-			br->entries[i].flags.abort? 'A' : '-',
-			br->entries[i].flags.cycles);
+			mispred_str(entries + i),
+			entries[i].flags.in_tx ? 'X' : '-',
+			entries[i].flags.abort ? 'A' : '-',
+			entries[i].flags.cycles);
 	}
 
 	return printed;
@@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
 					   struct perf_event_attr *attr, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	struct addr_location alf, alt;
 	u64 i, from, to;
 	int printed = 0;
@@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
 
 		memset(&alf, 0, sizeof(alf));
 		memset(&alt, 0, sizeof(alt));
-		from = br->entries[i].from;
-		to   = br->entries[i].to;
+		from = entries[i].from;
+		to   = entries[i].to;
 
 		if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
 		    !alf.map->dso->adjust_symbols)
@@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
 			printed += fprintf(fp, ")");
 		}
 		printed += fprintf(fp, "/%c/%c/%c/%d ",
-			mispred_str(br->entries + i),
-			br->entries[i].flags.in_tx ? 'X' : '-',
-			br->entries[i].flags.abort ? 'A' : '-',
-			br->entries[i].flags.cycles);
+			mispred_str(entries + i),
+			entries[i].flags.in_tx ? 'X' : '-',
+			entries[i].flags.abort ? 'A' : '-',
+			entries[i].flags.cycles);
 	}
 
 	return printed;
@@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 					    struct machine *machine, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	u64 start, end;
 	int i, insn, len, nr, ilen, printed = 0;
 	struct perf_insn x;
@@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	printed += fprintf(fp, "%c", '\n');
 
 	/* Handle first from jump, of which we don't know the entry. */
-	len = grab_bb(buffer, br->entries[nr-1].from,
-			br->entries[nr-1].from,
+	len = grab_bb(buffer, entries[nr-1].from,
+			entries[nr-1].from,
 			machine, thread, &x.is64bit, &x.cpumode, false);
 	if (len > 0) {
-		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+		printed += ip__fprintf_sym(entries[nr - 1].from, thread,
 					   x.cpumode, x.cpu, &lastsym, attr, fp);
-		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+		printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
 					    &x, buffer, len, 0, fp, &total_cycles);
 		if (PRINT_FIELD(SRCCODE))
-			printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
+			printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
 	}
 
 	/* Print all blocks */
 	for (i = nr - 2; i >= 0; i--) {
-		if (br->entries[i].from || br->entries[i].to)
+		if (entries[i].from || entries[i].to)
 			pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
-				 br->entries[i].from,
-				 br->entries[i].to);
-		start = br->entries[i + 1].to;
-		end   = br->entries[i].from;
+				 entries[i].from,
+				 entries[i].to);
+		start = entries[i + 1].to;
+		end   = entries[i].from;
 
 		len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
 		/* Patch up missing kernel transfers due to ring filters */
 		if (len == -ENXIO && i > 0) {
-			end = br->entries[--i].from;
+			end = entries[--i].from;
 			pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
 			len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
 		}
@@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 			if (ip == end) {
-				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
+				printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
 							    &total_cycles);
 				if (PRINT_FIELD(SRCCODE))
 					printed += print_srccode(thread, x.cpumode, ip);
@@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	 * Hit the branch? In this case we are already done, and the target
 	 * has not been executed yet.
 	 */
-	if (br->entries[0].from == sample->ip)
+	if (entries[0].from == sample->ip)
 		goto out;
-	if (br->entries[0].flags.abort)
+	if (entries[0].flags.abort)
 		goto out;
 
 	/*
@@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	 * between final branch and sample. When this happens just
 	 * continue walking after the last TO until we hit a branch.
 	 */
-	start = br->entries[0].to;
+	start = entries[0].to;
 	end = sample->ip;
 	if (end < start) {
 		/* Missing jump. Scan 128 bytes for the next branch */
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
 	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
 			 "Configure all used events to run in user space.",
 			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+		    "Use with 'percore' event qualifier to show the event "
+		    "counts of one hardware thread by sum up total hardware "
+		    "threads of same physical core"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8affcab75604..d2539b793f9d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL);
+	err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL);
 	if (err == 0) {
 		top->sym_filter_entry = he;
 	} else {
@@ -684,7 +684,9 @@ repeat:
 	delay_msecs = top->delay_secs * MSEC_PER_SEC;
 	set_term_quiet_input(&save);
 	/* trash return*/
-	getc(stdin);
+	clearerr(stdin);
+	if (poll(&stdin_poll, 1, 0) > 0)
+		getc(stdin);
 
 	while (!done) {
 		perf_top__print_sym_table(top);
@@ -1683,7 +1685,7 @@ int cmd_top(int argc, const char **argv)
 	if (status < 0)
 		goto out_delete_evlist;
 
-	annotation_config__init();
+	annotation_config__init(&top.annotation_opts);
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	status = symbol__init(NULL);
diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h
index 607189a315b2..6e61c4bdf548 100644
--- a/tools/perf/include/bpf/pid_filter.h
+++ b/tools/perf/include/bpf/pid_filter.h
@@ -3,7 +3,7 @@
 #ifndef _PERF_BPF_PID_FILTER_
 #define _PERF_BPF_PID_FILTER_
 
-#include <bpf/bpf.h>
+#include <bpf.h>
 
 #define pid_filter(name) pid_map(name, bool)
 
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
index 7ca6fa5463ee..316af5b2ff35 100644
--- a/tools/perf/include/bpf/stdio.h
+++ b/tools/perf/include/bpf/stdio.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#include <bpf/bpf.h>
+#include <bpf.h>
 
 struct bpf_map SEC("maps") __bpf_stdout__ = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h
index d1a35b6c649d..ca7877f9a976 100644
--- a/tools/perf/include/bpf/unistd.h
+++ b/tools/perf/include/bpf/unistd.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: LGPL-2.1
 
-#include <bpf/bpf.h>
+#include <bpf.h>
 
 static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
 
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
index 5e36bc2468d0..c998e4f1d1d2 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
@@ -4,27 +4,27 @@
 		"EventCode": "80",
 		"EventName": "ECC_FUNCTION_COUNT",
 		"BriefDescription": "ECC Function Count",
-		"PublicDescription": "Long ECC function Count"
+		"PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU."
 	},
 	{
 		"Unit": "CPU-M-CF",
 		"EventCode": "81",
 		"EventName": "ECC_CYCLES_COUNT",
 		"BriefDescription": "ECC Cycles Count",
-		"PublicDescription": "Long ECC Function cycles count"
+		"PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU."
 	},
 	{
 		"Unit": "CPU-M-CF",
 		"EventCode": "82",
 		"EventName": "ECC_BLOCKED_FUNCTION_COUNT",
 		"BriefDescription": "Ecc Blocked Function Count",
-		"PublicDescription": "Long ECC blocked function count"
+		"PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU."
 	},
 	{
 		"Unit": "CPU-M-CF",
 		"EventCode": "83",
 		"EventName": "ECC_BLOCKED_CYCLES_COUNT",
 		"BriefDescription": "ECC Blocked Cycles Count",
-		"PublicDescription": "Long ECC blocked cycles count"
+		"PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU."
 	},
 ]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
index 89e070727e1b..2df2e231e9ee 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
@@ -25,7 +25,7 @@
 		"EventCode": "131",
 		"EventName": "DTLB2_HPAGE_WRITES",
 		"BriefDescription": "DTLB2 One-Megabyte Page Writes",
-		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page"
 	},
 	{
 		"Unit": "CPU-M-CF",
@@ -358,6 +358,34 @@
 	},
 	{
 		"Unit": "CPU-M-CF",
+		"EventCode": "247",
+		"EventName": "DFLT_ACCESS",
+		"BriefDescription": "Cycles CPU spent obtaining access to Deflate unit",
+		"PublicDescription": "Cycles CPU spent obtaining access to Deflate unit"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "252",
+		"EventName": "DFLT_CYCLES",
+		"BriefDescription": "Cycles CPU is using Deflate unit",
+		"PublicDescription": "Cycles CPU is using Deflate unit"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "264",
+		"EventName": "DFLT_CC",
+		"BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed",
+		"PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "265",
+		"EventName": "DFLT_CCERROR",
+		"BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2",
+		"PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2"
+	},
+	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "448",
 		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
 		"BriefDescription": "Cycle count with one thread active",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index f94653229dd4..a728c6e5119b 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -215,7 +215,8 @@
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "MetricGroup": "TLB",
-        "MetricName": "Page_Walks_Utilization"
+        "MetricName": "Page_Walks_Utilization",
+        "MetricConstraint": "NO_NMI_WATCHDOG"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index e7feb60f9fa9..f97e8316ad2f 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -215,7 +215,8 @@
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "MetricGroup": "TLB",
-        "MetricName": "Page_Walks_Utilization"
+        "MetricName": "Page_Walks_Utilization",
+        "MetricConstraint": "NO_NMI_WATCHDOG"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 21d7a0c2c2e8..35f5db1786f7 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -215,7 +215,8 @@
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "MetricGroup": "TLB",
-        "MetricName": "Page_Walks_Utilization"
+        "MetricName": "Page_Walks_Utilization",
+        "MetricConstraint": "NO_NMI_WATCHDOG"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 079c77b6a2fd..3c4236a5bad8 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
 				    char *pmu, char *unit, char *perpkg,
 				    char *metric_expr,
 				    char *metric_name, char *metric_group,
-				    char *deprecated)
+				    char *deprecated, char *metric_constraint)
 {
 	struct perf_entry_data *pd = data;
 	FILE *outfp = pd->outfp;
@@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
 		fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
 	if (deprecated)
 		fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated);
+	if (metric_constraint)
+		fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint);
 	fprintf(outfp, "},\n");
 
 	return 0;
@@ -375,6 +377,7 @@ struct event_struct {
 	char *metric_name;
 	char *metric_group;
 	char *deprecated;
+	char *metric_constraint;
 };
 
 #define ADD_EVENT_FIELD(field) do { if (field) {		\
@@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event,
 				char *desc, char *long_desc, char *pmu,
 				char *unit, char *perpkg, char *metric_expr,
 				char *metric_name, char *metric_group,
-				char *deprecated)
+				char *deprecated, char *metric_constraint)
 {
 	struct event_struct *es;
 
@@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc,
 	  char **name, char **long_desc, char **pmu, char **filter,
 	  char **perpkg, char **unit, char **metric_expr, char **metric_name,
 	  char **metric_group, unsigned long long eventcode,
-	  char **deprecated)
+	  char **deprecated, char **metric_constraint)
 {
 	/* try to find matching event from arch standard values */
 	struct event_struct *es;
@@ -515,7 +518,7 @@ int json_events(const char *fn,
 		      char *pmu, char *unit, char *perpkg,
 		      char *metric_expr,
 		      char *metric_name, char *metric_group,
-		      char *deprecated),
+		      char *deprecated, char *metric_constraint),
 	  void *data)
 {
 	int err;
@@ -545,6 +548,7 @@ int json_events(const char *fn,
 		char *metric_name = NULL;
 		char *metric_group = NULL;
 		char *deprecated = NULL;
+		char *metric_constraint = NULL;
 		char *arch_std = NULL;
 		unsigned long long eventcode = 0;
 		struct msrmap *msr = NULL;
@@ -629,6 +633,8 @@ int json_events(const char *fn,
 				addfield(map, &metric_name, "", "", val);
 			} else if (json_streq(map, field, "MetricGroup")) {
 				addfield(map, &metric_group, "", "", val);
+			} else if (json_streq(map, field, "MetricConstraint")) {
+				addfield(map, &metric_constraint, "", "", val);
 			} else if (json_streq(map, field, "MetricExpr")) {
 				addfield(map, &metric_expr, "", "", val);
 				for (s = metric_expr; *s; s++)
@@ -670,13 +676,13 @@ int json_events(const char *fn,
 					&long_desc, &pmu, &filter, &perpkg,
 					&unit, &metric_expr, &metric_name,
 					&metric_group, eventcode,
-					&deprecated);
+					&deprecated, &metric_constraint);
 			if (err)
 				goto free_strings;
 		}
 		err = func(data, name, real_event(name, event), desc, long_desc,
 			   pmu, unit, perpkg, metric_expr, metric_name,
-			   metric_group, deprecated);
+			   metric_group, deprecated, metric_constraint);
 free_strings:
 		free(event);
 		free(desc);
@@ -691,6 +697,7 @@ free_strings:
 		free(metric_expr);
 		free(metric_name);
 		free(metric_group);
+		free(metric_constraint);
 		free(arch_std);
 
 		if (err)
@@ -1082,10 +1089,9 @@ static int process_one_file(const char *fpath, const struct stat *sb,
  */
 int main(int argc, char *argv[])
 {
-	int rc;
+	int rc, ret = 0;
 	int maxfds;
 	char ldirname[PATH_MAX];
-
 	const char *arch;
 	const char *output_file;
 	const char *start_dirname;
@@ -1156,7 +1162,8 @@ int main(int argc, char *argv[])
 		/* Make build fail */
 		fclose(eventsfp);
 		free_arch_std_events();
-		return 1;
+		ret = 1;
+		goto out_free_mapfile;
 	} else if (rc) {
 		goto empty_map;
 	}
@@ -1174,14 +1181,17 @@ int main(int argc, char *argv[])
 		/* Make build fail */
 		fclose(eventsfp);
 		free_arch_std_events();
-		return 1;
+		ret = 1;
 	}
 
-	return 0;
+
+	goto out_free_mapfile;
 
 empty_map:
 	fclose(eventsfp);
 	create_empty_mapping(output_file);
 	free_arch_std_events();
-	return 0;
+out_free_mapfile:
+	free(mapfile);
+	return ret;
 }
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index 5cda49a42143..2afc8304529e 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -8,7 +8,7 @@ int json_events(const char *fn,
 				char *pmu,
 				char *unit, char *perpkg, char *metric_expr,
 				char *metric_name, char *metric_group,
-				char *deprecated),
+				char *deprecated, char *metric_constraint),
 		void *data);
 char *get_cpu_str(void);
 
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index caeb577d36c9..53e76d5d5b37 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -18,6 +18,7 @@ struct pmu_event {
 	const char *metric_name;
 	const char *metric_group;
 	const char *deprecated;
+	const char *metric_constraint;
 };
 
 /*
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7076c20616..d307ce8fd6ed 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -28,7 +28,7 @@ sub trace_end
 sub irq::softirq_entry
 {
 	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	    $common_pid, $common_comm,
+	    $common_pid, $common_comm, $common_callchain,
 	    $vec) = @_;
 
 	print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
@@ -43,7 +43,7 @@ sub irq::softirq_entry
 sub kmem::kmalloc
 {
 	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	    $common_pid, $common_comm,
+	    $common_pid, $common_comm, $common_callchain,
 	    $call_site, $ptr, $bytes_req, $bytes_alloc,
 	    $gfp_flags) = @_;
 
@@ -92,7 +92,7 @@ sub print_unhandled
 sub trace_unhandled
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm) = @_;
+	$common_pid, $common_comm, $common_callchain) = @_;
 
     $unhandled{$event_name}++;
 }
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
index 55e7ae4c5c88..05954a8f363a 100644
--- a/tools/perf/scripts/perl/failed-syscalls.pl
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -18,7 +18,7 @@ my %failed_syscalls;
 sub raw_syscalls::sys_exit
 {
 	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	    $common_pid, $common_comm,
+	    $common_pid, $common_comm, $common_callchain,
 	    $id, $ret) = @_;
 
 	if ($ret < 0) {
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 168fa5e94b44..92a750b8552b 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -28,7 +28,7 @@ my %writes;
 sub syscalls::sys_enter_read
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+	$common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
 
     if ($common_comm eq $for_comm) {
 	$reads{$fd}{bytes_requested} += $count;
@@ -39,7 +39,7 @@ sub syscalls::sys_enter_read
 sub syscalls::sys_enter_write
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+	$common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
 
     if ($common_comm eq $for_comm) {
 	$writes{$fd}{bytes_written} += $count;
@@ -98,7 +98,7 @@ sub print_unhandled
 sub trace_unhandled
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm) = @_;
+	$common_pid, $common_comm, $common_callchain) = @_;
 
     $unhandled{$event_name}++;
 }
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
index 495698250b2f..d789fe39caab 100644
--- a/tools/perf/scripts/perl/rw-by-pid.pl
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -24,7 +24,7 @@ my %writes;
 sub syscalls::sys_exit_read
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $ret) = @_;
 
     if ($ret > 0) {
@@ -40,7 +40,7 @@ sub syscalls::sys_exit_read
 sub syscalls::sys_enter_read
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $fd, $buf, $count) = @_;
 
     $reads{$common_pid}{bytes_requested} += $count;
@@ -51,7 +51,7 @@ sub syscalls::sys_enter_read
 sub syscalls::sys_exit_write
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $ret) = @_;
 
     if ($ret <= 0) {
@@ -62,7 +62,7 @@ sub syscalls::sys_exit_write
 sub syscalls::sys_enter_write
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $fd, $buf, $count) = @_;
 
     $writes{$common_pid}{bytes_written} += $count;
@@ -178,7 +178,7 @@ sub print_unhandled
 sub trace_unhandled
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm) = @_;
+	$common_pid, $common_comm, $common_callchain) = @_;
 
     $unhandled{$event_name}++;
 }
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
index 6473442568a2..eba4df67af6b 100644
--- a/tools/perf/scripts/perl/rwtop.pl
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -35,7 +35,7 @@ if (!$interval) {
 sub syscalls::sys_exit_read
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $ret) = @_;
 
     print_check();
@@ -53,7 +53,7 @@ sub syscalls::sys_exit_read
 sub syscalls::sys_enter_read
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $fd, $buf, $count) = @_;
 
     print_check();
@@ -66,7 +66,7 @@ sub syscalls::sys_enter_read
 sub syscalls::sys_exit_write
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $ret) = @_;
 
     print_check();
@@ -79,7 +79,7 @@ sub syscalls::sys_exit_write
 sub syscalls::sys_enter_write
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$nr, $fd, $buf, $count) = @_;
 
     print_check();
@@ -197,7 +197,7 @@ sub print_unhandled
 sub trace_unhandled
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm) = @_;
+	$common_pid, $common_comm, $common_callchain) = @_;
 
     $unhandled{$event_name}++;
 }
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
index efcfec5e347a..53444ff4ec7f 100644
--- a/tools/perf/scripts/perl/wakeup-latency.pl
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -28,7 +28,7 @@ my $total_wakeups = 0;
 sub sched::sched_switch
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
 	$next_prio) = @_;
 
@@ -51,7 +51,7 @@ sub sched::sched_switch
 sub sched::sched_wakeup
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm,
+	$common_pid, $common_comm, $common_callchain,
 	$comm, $pid, $prio, $success, $target_cpu) = @_;
 
     $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
@@ -101,7 +101,7 @@ sub print_unhandled
 sub trace_unhandled
 {
     my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
-	$common_pid, $common_comm) = @_;
+	$common_pid, $common_comm, $common_callchain) = @_;
 
     $unhandled{$event_name}++;
 }
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 8cc30e731c73..d053b325f728 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 llvm-src-base.c
 llvm-src-kbuild.c
 llvm-src-prologue.c
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d0b935356274..489b50604cf2 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -19,7 +19,7 @@
 #include "../perf-sys.h"
 #include "cloexec.h"
 
-volatile long the_var;
+static volatile long the_var;
 
 static noinline int test_function(void)
 {
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5f05db75cdd8..54d9516c9839 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -543,8 +543,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
 		return -1;
 
 	dir = opendir(st.dir);
-	if (!dir)
+	if (!dir) {
+		pr_err("failed to open shell test directory: %s\n",
+			st.dir);
 		return -1;
+	}
 
 	for_each_shell_test(dir, st.dir, ent) {
 		int curr = i++;
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 87843af4c118..28313e59d6f6 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2)
 {
 	double val;
 
-	if (expr__parse(&val, ctx, &e))
+	if (expr__parse(&val, ctx, e))
 		TEST_ASSERT_VAL("parse test failed", 0);
 	TEST_ASSERT_VAL("unexpected value", val == val2);
 	return 0;
@@ -44,12 +44,12 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 		return ret;
 
 	p = "FOO/0";
-	ret = expr__parse(&val, &ctx, &p);
-	TEST_ASSERT_VAL("division by zero", ret == 1);
+	ret = expr__parse(&val, &ctx, p);
+	TEST_ASSERT_VAL("division by zero", ret == -1);
 
 	p = "BAR/";
-	ret = expr__parse(&val, &ctx, &p);
-	TEST_ASSERT_VAL("missing operand", ret == 1);
+	ret = expr__parse(&val, &ctx, p);
+	TEST_ASSERT_VAL("missing operand", ret == -1);
 
 	TEST_ASSERT_VAL("find other",
 			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 2762e1155238..14239e472187 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1,
 
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
 		COMP(branch_stack->nr);
+		COMP(branch_stack->hw_idx);
 		for (i = 0; i < s1->branch_stack->nr; i++)
 			MCOMP(branch_stack->entries[i]);
 	}
@@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 		u64 data[64];
 	} branch_stack = {
 		/* 1 branch_entry */
-		.data = {1, 211, 212, 213},
+		.data = {1, -1ULL, 211, 212, 213},
 	};
 	u64 regs[64];
 	const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
@@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 		.transaction	= 112,
 		.raw_data	= (void *)raw_data,
 		.callchain	= &callchain.callchain,
+		.no_hw_idx      = false,
 		.branch_stack	= &branch_stack.branch_stack,
 		.user_regs	= {
 			.abi	= PERF_SAMPLE_REGS_ABI_64,
@@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 	if (sample_type & PERF_SAMPLE_REGS_INTR)
 		evsel.core.attr.sample_regs_intr = sample_regs;
 
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+		evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+
 	for (i = 0; i < sizeof(regs); i++)
 		*(i + (u8 *)regs) = i & 0xfe;
 
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 7cb99b433888..c2cc42daf924 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -14,7 +14,7 @@ add_probe_vfs_getname() {
 	if [ $had_vfs_getname -eq 1 ] ; then
 		line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
 		perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
-		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
+		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
 	fi
 }
 
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index badbddbb30f8..9023267e5643 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"?             Search string backwards\n");
 			continue;
 		case 'r':
-			{
-				script_browse(NULL, NULL);
-				continue;
-			}
+			script_browse(NULL, NULL);
+			annotate_browser__show(&browser->b, title, help);
+			continue;
 		case 'k':
 			notes->options->show_linenr = !notes->options->show_linenr;
 			break;
@@ -834,13 +833,13 @@ show_sup_ins:
 			map_symbol__annotation_dump(ms, evsel, browser->opts);
 			continue;
 		case 't':
-			if (notes->options->show_total_period) {
-				notes->options->show_total_period = false;
-				notes->options->show_nr_samples = true;
-			} else if (notes->options->show_nr_samples)
-				notes->options->show_nr_samples = false;
+			if (symbol_conf.show_total_period) {
+				symbol_conf.show_total_period = false;
+				symbol_conf.show_nr_samples = true;
+			} else if (symbol_conf.show_nr_samples)
+				symbol_conf.show_nr_samples = false;
 			else
-				notes->options->show_total_period = true;
+				symbol_conf.show_total_period = true;
 			annotation__update_column_widths(notes);
 			continue;
 		case 'c':
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 22cc240f7371..35f9641bf670 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
 	if (ms->map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL);
+	err = symbol__annotate(ms, evsel, &annotation__default_options, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 07da6c790b63..c0cf8dff694e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -121,7 +121,9 @@ perf-y += mem-events.o
 perf-y += vsprintf.o
 perf-y += units.o
 perf-y += time-utils.o
+perf-y += expr-flex.o
 perf-y += expr-bison.o
+perf-y += expr.o
 perf-y += branch.o
 perf-y += mem2node.o
 
@@ -189,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
 	$(call rule_mkdir)
 	$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
 
+$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+
 $(OUTPUT)util/expr-bison.c: util/expr.y
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
 
 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
 	$(call rule_mkdir)
@@ -203,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
 
 CFLAGS_parse-events-flex.o  += -w
 CFLAGS_pmu-flex.o           += -w
+CFLAGS_expr-flex.o          += -w
 CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
 CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
 CFLAGS_expr-bison.o         += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
 
 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
 
 CFLAGS_bitmap.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_find_bit.o      += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
@@ -216,6 +224,7 @@ CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
 CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_parse-events.o  += -Wno-redundant-decls
+CFLAGS_expr.o          += -Wno-redundant-decls
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
 
 $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ca73fb74ad03..f1ea0d61eb5b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1143,93 +1143,70 @@ out:
 }
 
 struct annotate_args {
-	size_t			 privsize;
-	struct arch		*arch;
-	struct map_symbol	 ms;
-	struct evsel	*evsel;
+	struct arch		  *arch;
+	struct map_symbol	  ms;
+	struct evsel		  *evsel;
 	struct annotation_options *options;
-	s64			 offset;
-	char			*line;
-	int			 line_nr;
+	s64			  offset;
+	char			  *line;
+	int			  line_nr;
 };
 
-static void annotation_line__delete(struct annotation_line *al)
+static void annotation_line__init(struct annotation_line *al,
+				  struct annotate_args *args,
+				  int nr)
 {
-	void *ptr = (void *) al - al->privsize;
+	al->offset = args->offset;
+	al->line = strdup(args->line);
+	al->line_nr = args->line_nr;
+	al->data_nr = nr;
+}
 
+static void annotation_line__exit(struct annotation_line *al)
+{
 	free_srcline(al->path);
 	zfree(&al->line);
-	free(ptr);
 }
 
-/*
- * Allocating the annotation line data with following
- * structure:
- *
- *    --------------------------------------
- *    private space | struct annotation_line
- *    --------------------------------------
- *
- * Size of the private space is stored in 'struct annotation_line'.
- *
- */
-static struct annotation_line *
-annotation_line__new(struct annotate_args *args, size_t privsize)
+static size_t disasm_line_size(int nr)
 {
 	struct annotation_line *al;
-	struct evsel *evsel = args->evsel;
-	size_t size = privsize + sizeof(*al);
-	int nr = 1;
-
-	if (perf_evsel__is_group_event(evsel))
-		nr = evsel->core.nr_members;
 
-	size += sizeof(al->data[0]) * nr;
-
-	al = zalloc(size);
-	if (al) {
-		al = (void *) al + privsize;
-		al->privsize   = privsize;
-		al->offset     = args->offset;
-		al->line       = strdup(args->line);
-		al->line_nr    = args->line_nr;
-		al->data_nr    = nr;
-	}
-
-	return al;
+	return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
 }
 
 /*
  * Allocating the disasm annotation line data with
  * following structure:
  *
- *    ------------------------------------------------------------
- *    privsize space | struct disasm_line | struct annotation_line
- *    ------------------------------------------------------------
+ *    -------------------------------------------
+ *    struct disasm_line | struct annotation_line
+ *    -------------------------------------------
  *
  * We have 'struct annotation_line' member as last member
  * of 'struct disasm_line' to have an easy access.
- *
  */
 static struct disasm_line *disasm_line__new(struct annotate_args *args)
 {
 	struct disasm_line *dl = NULL;
-	struct annotation_line *al;
-	size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+	int nr = 1;
 
-	al = annotation_line__new(args, privsize);
-	if (al != NULL) {
-		dl = disasm_line(al);
+	if (perf_evsel__is_group_event(args->evsel))
+		nr = args->evsel->core.nr_members;
 
-		if (dl->al.line == NULL)
-			goto out_delete;
+	dl = zalloc(disasm_line_size(nr));
+	if (!dl)
+		return NULL;
 
-		if (args->offset != -1) {
-			if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
-				goto out_free_line;
+	annotation_line__init(&dl->al, args, nr);
+	if (dl->al.line == NULL)
+		goto out_delete;
 
-			disasm_line__init_ins(dl, args->arch, &args->ms);
-		}
+	if (args->offset != -1) {
+		if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+			goto out_free_line;
+
+		disasm_line__init_ins(dl, args->arch, &args->ms);
 	}
 
 	return dl;
@@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl)
 	else
 		ins__delete(&dl->ops);
 	zfree(&dl->ins.name);
-	annotation_line__delete(&dl->al);
+	annotation_line__exit(&dl->al);
+	free(dl);
 }
 
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
@@ -2149,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
 	annotation__calc_percent(notes, evsel, symbol__size(sym));
 }
 
-int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize,
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
 		     struct annotation_options *options, struct arch **parch)
 {
 	struct symbol *sym = ms->sym;
 	struct annotation *notes = symbol__annotation(sym);
 	struct annotate_args args = {
-		.privsize	= privsize,
 		.evsel		= evsel,
 		.options	= options,
 	};
@@ -2634,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 
 		if (++al->jump_sources > notes->max_jump_sources)
 			notes->max_jump_sources = al->jump_sources;
-
-		++notes->nr_jumps;
 	}
 }
 
@@ -2644,6 +2619,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
 	struct annotation_line *al;
 
 	notes->max_line_len = 0;
+	notes->nr_entries = 0;
+	notes->nr_asm_entries = 0;
 
 	list_for_each_entry(al, &notes->src->source, node) {
 		size_t line_len = strlen(al->line);
@@ -2790,7 +2767,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
 	struct symbol *sym = ms->sym;
 	struct rb_root source_line = RB_ROOT;
 
-	if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0)
+	if (symbol__annotate(ms, evsel, opts, NULL) < 0)
 		return -1;
 
 	symbol__calc_percent(sym, evsel);
@@ -2915,9 +2892,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 			percent = annotation_data__percent(&al->data[i], percent_type);
 
 			obj__set_percent_color(obj, percent, current_entry);
-			if (notes->options->show_total_period) {
+			if (symbol_conf.show_total_period) {
 				obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
-			} else if (notes->options->show_nr_samples) {
+			} else if (symbol_conf.show_nr_samples) {
 				obj__printf(obj, "%6" PRIu64 " ",
 						   al->data[i].he.nr_samples);
 			} else {
@@ -2931,8 +2908,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 			obj__printf(obj, "%-*s", pcnt_width, " ");
 		else {
 			obj__printf(obj, "%-*s", pcnt_width,
-					   notes->options->show_total_period ? "Period" :
-					   notes->options->show_nr_samples ? "Samples" : "Percent");
+					   symbol_conf.show_total_period ? "Period" :
+					   symbol_conf.show_nr_samples ? "Samples" : "Percent");
 		}
 	}
 
@@ -3070,7 +3047,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
 	if (perf_evsel__is_group_event(evsel))
 		nr_pcnt = evsel->core.nr_members;
 
-	err = symbol__annotate(ms, evsel, 0, options, parch);
+	err = symbol__annotate(ms, evsel, options, parch);
 	if (err)
 		goto out_free_offsets;
 
@@ -3094,69 +3071,46 @@ out_free_offsets:
 	return err;
 }
 
-#define ANNOTATION__CFG(n) \
-	{ .name = #n, .value = &annotation__default_options.n, }
-
-/*
- * Keep the entries sorted, they are bsearch'ed
- */
-static struct annotation_config {
-	const char *name;
-	void *value;
-} annotation__configs[] = {
-	ANNOTATION__CFG(hide_src_code),
-	ANNOTATION__CFG(jump_arrows),
-	ANNOTATION__CFG(offset_level),
-	ANNOTATION__CFG(show_linenr),
-	ANNOTATION__CFG(show_nr_jumps),
-	ANNOTATION__CFG(show_nr_samples),
-	ANNOTATION__CFG(show_total_period),
-	ANNOTATION__CFG(use_offset),
-};
-
-#undef ANNOTATION__CFG
-
-static int annotation_config__cmp(const void *name, const void *cfgp)
+static int annotation__config(const char *var, const char *value, void *data)
 {
-	const struct annotation_config *cfg = cfgp;
-
-	return strcmp(name, cfg->name);
-}
-
-static int annotation__config(const char *var, const char *value,
-			    void *data __maybe_unused)
-{
-	struct annotation_config *cfg;
-	const char *name;
+	struct annotation_options *opt = data;
 
 	if (!strstarts(var, "annotate."))
 		return 0;
 
-	name = var + 9;
-	cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
-		      sizeof(struct annotation_config), annotation_config__cmp);
-
-	if (cfg == NULL)
-		pr_debug("%s variable unknown, ignoring...", var);
-	else if (strcmp(var, "annotate.offset_level") == 0) {
-		perf_config_int(cfg->value, name, value);
-
-		if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
-			*(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
-		else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
-			*(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+	if (!strcmp(var, "annotate.offset_level")) {
+		perf_config_u8(&opt->offset_level, "offset_level", value);
+
+		if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+			opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+		else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+			opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+	} else if (!strcmp(var, "annotate.hide_src_code")) {
+		opt->hide_src_code = perf_config_bool("hide_src_code", value);
+	} else if (!strcmp(var, "annotate.jump_arrows")) {
+		opt->jump_arrows = perf_config_bool("jump_arrows", value);
+	} else if (!strcmp(var, "annotate.show_linenr")) {
+		opt->show_linenr = perf_config_bool("show_linenr", value);
+	} else if (!strcmp(var, "annotate.show_nr_jumps")) {
+		opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+	} else if (!strcmp(var, "annotate.show_nr_samples")) {
+		symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+								value);
+	} else if (!strcmp(var, "annotate.show_total_period")) {
+		symbol_conf.show_total_period = perf_config_bool("show_total_period",
+								value);
+	} else if (!strcmp(var, "annotate.use_offset")) {
+		opt->use_offset = perf_config_bool("use_offset", value);
 	} else {
-		*(bool *)cfg->value = perf_config_bool(name, value);
+		pr_debug("%s variable unknown, ignoring...", var);
 	}
+
 	return 0;
 }
 
-void annotation_config__init(void)
+void annotation_config__init(struct annotation_options *opt)
 {
-	perf_config(annotation__config, NULL);
-
-	annotation__default_options.show_total_period = symbol_conf.show_total_period;
-	annotation__default_options.show_nr_samples   = symbol_conf.show_nr_samples;
+	perf_config(annotation__config, opt);
 }
 
 static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 455403e8fede..07c775938d46 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -83,8 +83,6 @@ struct annotation_options {
 	     full_path,
 	     show_linenr,
 	     show_nr_jumps,
-	     show_nr_samples,
-	     show_total_period,
 	     show_minmax_cycle,
 	     show_asm_raw,
 	     annotate_src;
@@ -141,7 +139,6 @@ struct annotation_line {
 	u64			 cycles;
 	u64			 cycles_max;
 	u64			 cycles_min;
-	size_t			 privsize;
 	char			*path;
 	u32			 idx;
 	int			 idx_asm;
@@ -282,7 +279,6 @@ struct annotation {
 	struct annotation_options *options;
 	struct annotation_line	**offsets;
 	int			nr_events;
-	int			nr_jumps;
 	int			max_jump_sources;
 	int			nr_entries;
 	int			nr_asm_entries;
@@ -309,7 +305,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
 
 static inline int annotation__pcnt_width(struct annotation *notes)
 {
-	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+	return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
 }
 
 static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
@@ -352,7 +348,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
 int symbol__annotate(struct map_symbol *ms,
-		     struct evsel *evsel, size_t privsize,
+		     struct evsel *evsel,
 		     struct annotation_options *options,
 		     struct arch **parch);
 int symbol__annotate2(struct map_symbol *ms,
@@ -413,7 +409,7 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
 }
 #endif
 
-void annotation_config__init(void);
+void annotation_config__init(struct annotation_options *opt);
 
 int annotate_parse_percent_type(const struct option *opt, const char *_str,
 				int unset);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index eb087e7df6f4..3571ce72ca28 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct evlist *evlist,
 			     struct record_opts *opts)
 {
-	if (itr)
+	if (itr) {
+		itr->evlist = evlist;
 		return itr->recording_options(itr, evlist, opts);
+	}
 	return 0;
 }
 
@@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 	return -EINVAL;
 }
 
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct evsel *evsel;
+
+	if (!itr->evlist || !itr->pmu)
+		return -EINVAL;
+
+	evlist__for_each_entry(itr->evlist, evsel) {
+		if (evsel->core.attr.type == itr->pmu->type) {
+			if (evsel->disabled)
+				return 0;
+			return perf_evlist__enable_event_idx(itr->evlist, evsel,
+							     idx);
+		}
+	}
+	return -EINVAL;
+}
+
 /*
  * Event record size is 16-bit which results in a maximum size of about 64KiB.
  * Allow about 4KiB for the rest of the sample record, to give a maximum
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 749d72cd9c7b..e58ef160b599 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -29,6 +29,7 @@ struct record_opts;
 struct perf_record_auxtrace_error;
 struct perf_record_auxtrace_info;
 struct events_stats;
+struct perf_pmu;
 
 enum auxtrace_error_type {
        PERF_AUXTRACE_ERROR_ITRACE  = 1,
@@ -322,6 +323,8 @@ struct auxtrace_mmap_params {
  * @read_finish: called after reading from an auxtrace mmap
  * @alignment: alignment (if any) for AUX area data
  * @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
  */
 struct auxtrace_record {
 	int (*recording_options)(struct auxtrace_record *itr,
@@ -346,6 +349,8 @@ struct auxtrace_record {
 	int (*read_finish)(struct auxtrace_record *itr, int idx);
 	unsigned int alignment;
 	unsigned int default_aux_sample_size;
+	struct perf_pmu *pmu;
+	struct evlist *evlist;
 };
 
 /**
@@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
 				   struct auxtrace_mmap *mm,
 				   unsigned char *data, u64 *head, u64 *old);
 u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
 
 int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
 				   off_t file_offset);
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index c4b030bf6ec2..423ec69bda6c 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -65,8 +65,7 @@ struct block_info *block_info__new(void)
 	return bi;
 }
 
-int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
-			struct hist_entry *left, struct hist_entry *right)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	struct block_info *bi_l = left->block_info;
 	struct block_info *bi_r = right->block_info;
@@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
 
 	if (!bi_l->sym || !bi_r->sym) {
 		if (!bi_l->sym && !bi_r->sym)
-			return 0;
+			return -1;
 		else if (!bi_l->sym)
 			return -1;
 		else
 			return 1;
 	}
 
-	if (bi_l->sym == bi_r->sym) {
-		if (bi_l->start == bi_r->start) {
-			if (bi_l->end == bi_r->end)
-				return 0;
-			else
-				return (int64_t)(bi_r->end - bi_l->end);
-		} else
-			return (int64_t)(bi_r->start - bi_l->start);
-	} else {
-		cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+	cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+	if (cmp)
 		return cmp;
-	}
 
-	if (bi_l->sym->start != bi_r->sym->start)
-		return (int64_t)(bi_r->sym->start - bi_l->sym->start);
+	if (bi_l->start != bi_r->start)
+		return (int64_t)(bi_r->start - bi_l->start);
 
-	return (int64_t)(bi_r->sym->end - bi_l->sym->end);
+	return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			struct hist_entry *left, struct hist_entry *right)
+{
+	return __block_info__cmp(left, right);
 }
 
 static void init_block_info(struct block_info *bi, struct symbol *sym,
@@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt,
 	return block_fmt->width;
 }
 
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+	if (use_browser) {
+		return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+						 width - 1, pct);
+	}
+#endif
+	return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
 static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
 					struct perf_hpp *hpp,
 					struct hist_entry *he)
@@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
 	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
 	struct block_info *bi = he->block_info;
 	double ratio = 0.0;
-	char buf[16];
 
 	if (block_fmt->total_cycles)
 		ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
 
-	sprintf(buf, "%.2f%%", 100.0 * ratio);
-
-	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+	return color_pct(hpp, block_fmt->width, 100.0 * ratio);
 }
 
 static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
@@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
 	struct block_info *bi = he->block_info;
 	double ratio = 0.0;
 	u64 avg;
-	char buf[16];
 
 	if (block_fmt->block_cycles && bi->num_aggr) {
 		avg = bi->cycles_aggr / bi->num_aggr;
 		ratio = (double)avg / (double)block_fmt->block_cycles;
 	}
 
-	sprintf(buf, "%.2f%%", 100.0 * ratio);
-
-	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+	return color_pct(hpp, block_fmt->width, 100.0 * ratio);
 }
 
 static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
@@ -295,7 +296,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
 				he->ms.sym);
 
-	if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+	if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+	    (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
 		scnprintf(buf, sizeof(buf), "[%s -> %s]",
 			  start_line, end_line);
 	} else {
@@ -348,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
 
 	switch (idx) {
 	case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
-		fmt->entry = block_total_cycles_pct_entry;
+		fmt->color = block_total_cycles_pct_entry;
 		fmt->cmp = block_info__cmp;
 		fmt->sort = block_total_cycles_pct_sort;
 		break;
@@ -356,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
 		fmt->entry = block_cycles_lbr_entry;
 		break;
 	case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
-		fmt->entry = block_cycles_pct_entry;
+		fmt->color = block_cycles_pct_entry;
 		break;
 	case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
 		fmt->entry = block_avg_cycles_entry;
@@ -376,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
 }
 
 static void register_block_columns(struct perf_hpp_list *hpp_list,
-				   struct block_fmt *block_fmts)
+				   struct block_fmt *block_fmts,
+				   int *block_hpps, int nr_hpps)
 {
-	for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
-		hpp_register(&block_fmts[i], i, hpp_list);
+	for (int i = 0; i < nr_hpps; i++)
+		hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
 }
 
-static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+			    int *block_hpps, int nr_hpps)
 {
 	__hists__init(&bh->block_hists, &bh->block_list);
 	perf_hpp_list__init(&bh->block_list);
 	bh->block_list.nr_header_lines = 1;
 
-	register_block_columns(&bh->block_list, block_fmts);
+	register_block_columns(&bh->block_list, block_fmts,
+			       block_hpps, nr_hpps);
 
-	perf_hpp_list__register_sort_field(&bh->block_list,
-		&block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
+	/* Sort by the first fmt */
+	perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
 }
 
-static void process_block_report(struct hists *hists,
-				 struct block_report *block_report,
-				 u64 total_cycles)
+static int process_block_report(struct hists *hists,
+				struct block_report *block_report,
+				u64 total_cycles, int *block_hpps,
+				int nr_hpps)
 {
 	struct rb_node *next = rb_first_cached(&hists->entries);
 	struct block_hist *bh = &block_report->hist;
 	struct hist_entry *he;
 
-	init_block_hist(bh, block_report->fmts);
+	if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+		return -1;
+
+	block_report->nr_fmts = nr_hpps;
+	init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
 
 	while (next) {
 		he = rb_entry(next, struct hist_entry, rb_node);
@@ -411,16 +421,19 @@ static void process_block_report(struct hists *hists,
 		next = rb_next(&he->rb_node);
 	}
 
-	for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
+	for (int i = 0; i < nr_hpps; i++) {
 		block_report->fmts[i].total_cycles = total_cycles;
 		block_report->fmts[i].block_cycles = block_report->cycles;
 	}
 
 	hists__output_resort(&bh->block_hists, NULL);
+	return 0;
 }
 
 struct block_report *block_info__create_report(struct evlist *evlist,
-					       u64 total_cycles)
+					       u64 total_cycles,
+					       int *block_hpps, int nr_hpps,
+					       int *nr_reps)
 {
 	struct block_report *block_reports;
 	int nr_hists = evlist->core.nr_entries, i = 0;
@@ -433,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist,
 	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 
-		process_block_report(hists, &block_reports[i], total_cycles);
+		process_block_report(hists, &block_reports[i], total_cycles,
+				     block_hpps, nr_hpps);
 		i++;
 	}
 
+	*nr_reps = nr_hists;
 	return block_reports;
 }
 
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+	for (int i = 0; i < nr_reps; i++)
+		hists__delete_entries(&reps[i].hist.block_hists);
+
+	free(reps);
+}
+
 int report__browse_block_hists(struct block_hist *bh, float min_percent,
 			       struct evsel *evsel, struct perf_env *env,
 			       struct annotation_options *annotation_opts)
@@ -451,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
 		symbol_conf.report_individual_block = true;
 		hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
 			       stdout, true);
-		hists__delete_entries(&bh->block_hists);
 		return 0;
 	case 1:
 		symbol_conf.report_individual_block = true;
 		ret = block_hists_tui_browse(bh, evsel, min_percent,
 					     env, annotation_opts);
-		hists__delete_entries(&bh->block_hists);
 		return ret;
 	default:
 		return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index bef0d75e9819..42e9dcc4cf0a 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -45,6 +45,7 @@ struct block_report {
 	struct block_hist	hist;
 	u64			cycles;
 	struct block_fmt	fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+	int			nr_fmts;
 };
 
 struct block_hist;
@@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi)
 
 #define block_info__zput(bi) __block_info__zput(&bi)
 
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
 int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
 			struct hist_entry *left, struct hist_entry *right);
 
@@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
 			    u64 *block_cycles_aggr, u64 total_cycles);
 
 struct block_report *block_info__create_report(struct evlist *evlist,
-					       u64 total_cycles);
+					       u64 total_cycles,
+					       int *block_hpps, int nr_hpps,
+					       int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
 
 int report__browse_block_hists(struct block_hist *bh, float min_percent,
 			       struct evsel *evsel, struct perf_env *env,
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 88e00d268f6f..154a05cd03af 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -12,6 +12,7 @@
 #include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
+#include "event.h"
 
 struct branch_flags {
 	u64 mispred:1;
@@ -39,9 +40,30 @@ struct branch_entry {
 
 struct branch_stack {
 	u64			nr;
+	u64			hw_idx;
 	struct branch_entry	entries[0];
 };
 
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ *	u64			nr;
+ *	struct branch_entry	entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+	u64 *entry = (u64 *)sample->branch_stack;
+
+	entry++;
+	if (sample->no_hw_idx)
+		return (struct branch_entry *)entry;
+	return (struct branch_entry *)(++entry);
+}
+
 struct branch_type_stat {
 	bool	branch_to;
 	u64	counts[PERF_BR_MAX];
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4881d4af3381..5bc9d3b01bd9 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,75 +3,16 @@
 #include "evsel.h"
 #include "cgroup.h"
 #include "evlist.h"
-#include <linux/stringify.h>
 #include <linux/zalloc.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
+#include <api/fs/fs.h>
 
 int nr_cgroups;
 
-static int
-cgroupfs_find_mountpoint(char *buf, size_t maxlen)
-{
-	FILE *fp;
-	char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
-	char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
-	char *token, *saved_ptr = NULL;
-
-	fp = fopen("/proc/mounts", "r");
-	if (!fp)
-		return -1;
-
-	/*
-	 * in order to handle split hierarchy, we need to scan /proc/mounts
-	 * and inspect every cgroupfs mount point to find one that has
-	 * perf_event subsystem
-	 */
-	path_v1[0] = '\0';
-	path_v2[0] = '\0';
-
-	while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
-				__stringify(PATH_MAX)"s %*d %*d\n",
-				mountpoint, type, tokens) == 3) {
-
-		if (!path_v1[0] && !strcmp(type, "cgroup")) {
-
-			token = strtok_r(tokens, ",", &saved_ptr);
-
-			while (token != NULL) {
-				if (!strcmp(token, "perf_event")) {
-					strcpy(path_v1, mountpoint);
-					break;
-				}
-				token = strtok_r(NULL, ",", &saved_ptr);
-			}
-		}
-
-		if (!path_v2[0] && !strcmp(type, "cgroup2"))
-			strcpy(path_v2, mountpoint);
-
-		if (path_v1[0] && path_v2[0])
-			break;
-	}
-	fclose(fp);
-
-	if (path_v1[0])
-		path = path_v1;
-	else if (path_v2[0])
-		path = path_v2;
-	else
-		return -1;
-
-	if (strlen(path) < maxlen) {
-		strcpy(buf, path);
-		return 0;
-	}
-	return -1;
-}
-
 static int open_cgroup(const char *name)
 {
 	char path[PATH_MAX + 1];
@@ -79,7 +20,7 @@ static int open_cgroup(const char *name)
 	int fd;
 
 
-	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
 		return -1;
 
 	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 0bc9c4d7fdc5..ef38eba56ed0 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value)
 	return 0;
 }
 
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+	long ret = 0;
+
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
+}
+
 static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
 {
 	int ret;
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index bd0a5897c76a..c10b66dde2f3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
 int perf_default_config(const char *, const char *, void *);
 int perf_config(config_fn_t fn, void *);
 int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
 int perf_config_u64(u64 *dest, const char *, const char *);
 int perf_config_bool(const char *, const char *);
 int config_error_nonbool(const char *);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5471045ebf5c..62d2f9b9ce1b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -363,6 +363,23 @@ struct cs_etm_packet_queue
 	return NULL;
 }
 
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+				struct cs_etm_traceid_queue *tidq)
+{
+	struct cs_etm_packet *tmp;
+
+	if (etm->sample_branches || etm->synth_opts.last_branch ||
+	    etm->sample_instructions) {
+		/*
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
+		 */
+		tmp = tidq->packet;
+		tidq->packet = tidq->prev_packet;
+		tidq->prev_packet = tmp;
+	}
+}
+
 static void cs_etm__packet_dump(const char *pkt_string)
 {
 	const char *color = PERF_COLOR_BLUE;
@@ -945,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
 	if (packet->isa == CS_ETM_ISA_T32) {
 		u64 addr = packet->start_addr;
 
-		while (offset > 0) {
+		while (offset) {
 			addr += cs_etm__t32_instr_size(etmq,
 						       trace_chan_id, addr);
 			offset--;
@@ -1134,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 
 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
 
-	if (etm->synth_opts.last_branch) {
-		cs_etm__copy_last_branch_rb(etmq, tidq);
+	if (etm->synth_opts.last_branch)
 		sample.branch_stack = tidq->last_branch;
-	}
 
 	if (etm->synth_opts.inject) {
 		ret = cs_etm__inject_event(event, &sample,
@@ -1153,9 +1168,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
 			ret);
 
-	if (etm->synth_opts.last_branch)
-		cs_etm__reset_last_branch_rb(tidq);
-
 	return ret;
 }
 
@@ -1172,6 +1184,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
 	union perf_event *event = tidq->event_buf;
 	struct dummy_branch_stack {
 		u64			nr;
+		u64			hw_idx;
 		struct branch_entry	entries;
 	} dummy_bs;
 	u64 ip;
@@ -1202,6 +1215,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
 	if (etm->synth_opts.last_branch) {
 		dummy_bs = (struct dummy_branch_stack){
 			.nr = 1,
+			.hw_idx = -1ULL,
 			.entries = {
 				.from = sample.ip,
 				.to = sample.addr,
@@ -1340,12 +1354,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
 			  struct cs_etm_traceid_queue *tidq)
 {
 	struct cs_etm_auxtrace *etm = etmq->etm;
-	struct cs_etm_packet *tmp;
 	int ret;
 	u8 trace_chan_id = tidq->trace_chan_id;
-	u64 instrs_executed = tidq->packet->instr_count;
+	u64 instrs_prev;
+
+	/* Get instructions remainder from previous packet */
+	instrs_prev = tidq->period_instructions;
 
-	tidq->period_instructions += instrs_executed;
+	tidq->period_instructions += tidq->packet->instr_count;
 
 	/*
 	 * Record a branch when the last instruction in
@@ -1363,26 +1379,80 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
 		 * TODO: allow period to be defined in cycles and clock time
 		 */
 
-		/* Get number of instructions executed after the sample point */
-		u64 instrs_over = tidq->period_instructions -
-			etm->instructions_sample_period;
+		/*
+		 * Below diagram demonstrates the instruction samples
+		 * generation flows:
+		 *
+		 *    Instrs     Instrs       Instrs       Instrs
+		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
+		 *    |            |            |            |
+		 *    V            V            V            V
+		 *   --------------------------------------------------
+		 *            ^                                  ^
+		 *            |                                  |
+		 *         Period                             Period
+		 *    instructions(Pi)                   instructions(Pi')
+		 *
+		 *            |                                  |
+		 *            \---------------- -----------------/
+		 *                             V
+		 *                 tidq->packet->instr_count
+		 *
+		 * Instrs Sample(n...) are the synthesised samples occurring
+		 * every etm->instructions_sample_period instructions - as
+		 * defined on the perf command line.  Sample(n) is being the
+		 * last sample before the current etm packet, n+1 to n+3
+		 * samples are generated from the current etm packet.
+		 *
+		 * tidq->packet->instr_count represents the number of
+		 * instructions in the current etm packet.
+		 *
+		 * Period instructions (Pi) contains the the number of
+		 * instructions executed after the sample point(n) from the
+		 * previous etm packet.  This will always be less than
+		 * etm->instructions_sample_period.
+		 *
+		 * When generate new samples, it combines with two parts
+		 * instructions, one is the tail of the old packet and another
+		 * is the head of the new coming packet, to generate
+		 * sample(n+1); sample(n+2) and sample(n+3) consume the
+		 * instructions with sample period.  After sample(n+3), the rest
+		 * instructions will be used by later packet and it is assigned
+		 * to tidq->period_instructions for next round calculation.
+		 */
 
 		/*
-		 * Calculate the address of the sampled instruction (-1 as
-		 * sample is reported as though instruction has just been
-		 * executed, but PC has not advanced to next instruction)
+		 * Get the initial offset into the current packet instructions;
+		 * entry conditions ensure that instrs_prev is less than
+		 * etm->instructions_sample_period.
 		 */
-		u64 offset = (instrs_executed - instrs_over - 1);
-		u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
-					      tidq->packet, offset);
+		u64 offset = etm->instructions_sample_period - instrs_prev;
+		u64 addr;
 
-		ret = cs_etm__synth_instruction_sample(
-			etmq, tidq, addr, etm->instructions_sample_period);
-		if (ret)
-			return ret;
+		/* Prepare last branches for instruction sample */
+		if (etm->synth_opts.last_branch)
+			cs_etm__copy_last_branch_rb(etmq, tidq);
 
-		/* Carry remaining instructions into next sample period */
-		tidq->period_instructions = instrs_over;
+		while (tidq->period_instructions >=
+				etm->instructions_sample_period) {
+			/*
+			 * Calculate the address of the sampled instruction (-1
+			 * as sample is reported as though instruction has just
+			 * been executed, but PC has not advanced to next
+			 * instruction)
+			 */
+			addr = cs_etm__instr_addr(etmq, trace_chan_id,
+						  tidq->packet, offset - 1);
+			ret = cs_etm__synth_instruction_sample(
+				etmq, tidq, addr,
+				etm->instructions_sample_period);
+			if (ret)
+				return ret;
+
+			offset += etm->instructions_sample_period;
+			tidq->period_instructions -=
+				etm->instructions_sample_period;
+		}
 	}
 
 	if (etm->sample_branches) {
@@ -1404,15 +1474,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
 		}
 	}
 
-	if (etm->sample_branches || etm->synth_opts.last_branch) {
-		/*
-		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
-		 * the next incoming packet.
-		 */
-		tmp = tidq->packet;
-		tidq->packet = tidq->prev_packet;
-		tidq->prev_packet = tmp;
-	}
+	cs_etm__packet_swap(etm, tidq);
 
 	return 0;
 }
@@ -1441,7 +1503,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
 {
 	int err = 0;
 	struct cs_etm_auxtrace *etm = etmq->etm;
-	struct cs_etm_packet *tmp;
 
 	/* Handle start tracing packet */
 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
@@ -1449,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
 
 	if (etmq->etm->synth_opts.last_branch &&
 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+		u64 addr;
+
+		/* Prepare last branches for instruction sample */
+		cs_etm__copy_last_branch_rb(etmq, tidq);
+
 		/*
 		 * Generate a last branch event for the branches left in the
 		 * circular buffer at the end of the trace.
@@ -1456,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
 		 * Use the address of the end of the last reported execution
 		 * range
 		 */
-		u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+		addr = cs_etm__last_executed_instr(tidq->prev_packet);
 
 		err = cs_etm__synth_instruction_sample(
 			etmq, tidq, addr,
@@ -1476,15 +1542,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
 	}
 
 swap_packet:
-	if (etm->sample_branches || etm->synth_opts.last_branch) {
-		/*
-		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
-		 * the next incoming packet.
-		 */
-		tmp = tidq->packet;
-		tidq->packet = tidq->prev_packet;
-		tidq->prev_packet = tmp;
-	}
+	cs_etm__packet_swap(etm, tidq);
+
+	/* Reset last branches after flush the trace */
+	if (etm->synth_opts.last_branch)
+		cs_etm__reset_last_branch_rb(tidq);
 
 	return err;
 }
@@ -1505,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
 	 */
 	if (etmq->etm->synth_opts.last_branch &&
 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+		u64 addr;
+
+		/* Prepare last branches for instruction sample */
+		cs_etm__copy_last_branch_rb(etmq, tidq);
+
 		/*
 		 * Use the address of the end of the last reported execution
 		 * range.
 		 */
-		u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+		addr = cs_etm__last_executed_instr(tidq->prev_packet);
 
 		err = cs_etm__synth_instruction_sample(
 			etmq, tidq, addr,
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6242a9215df7..4154f944f474 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -343,11 +343,11 @@ static const char *normalize_arch(char *arch)
 
 const char *perf_env__arch(struct perf_env *env)
 {
-	struct utsname uts;
 	char *arch_name;
 
 	if (!env || !env->arch) { /* Assume local operation */
-		if (uname(&uts) < 0)
+		static struct utsname uts = { .machine[0] = '\0', };
+		if (uts.machine[0] == '\0' && uname(&uts) < 0)
 			return NULL;
 		arch_name = uts.machine;
 	} else
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 85223159737c..3cda40a2fafc 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -139,6 +139,7 @@ struct perf_sample {
 	u16 insn_len;
 	u8  cpumode;
 	u16 misc;
+	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
 	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c8dc4450884c..816d930d774e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
 				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
 							PERF_SAMPLE_BRANCH_CALL_STACK |
 							PERF_SAMPLE_BRANCH_NO_CYCLES |
-							PERF_SAMPLE_BRANCH_NO_FLAGS;
+							PERF_SAMPLE_BRANCH_NO_FLAGS |
+							PERF_SAMPLE_BRANCH_HW_INDEX;
 			}
 		} else
 			 pr_warning("Cannot use LBR callstack with branch stack. "
@@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
 	if (param->record_mode == CALLCHAIN_LBR) {
 		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
-					      PERF_SAMPLE_BRANCH_CALL_STACK);
+					      PERF_SAMPLE_BRANCH_CALL_STACK |
+					      PERF_SAMPLE_BRANCH_HW_INDEX);
 	}
 	if (param->record_mode == CALLCHAIN_DWARF) {
 		perf_evsel__reset_sample_bit(evsel, REGS_USER);
@@ -1673,6 +1675,8 @@ fallback_missing_features:
 		evsel->core.attr.ksymbol = 0;
 	if (perf_missing_features.bpf)
 		evsel->core.attr.bpf_event = 0;
+	if (perf_missing_features.branch_hw_idx)
+		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
 retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->core.attr.sample_id_all = 0;
@@ -1784,7 +1788,12 @@ try_fallback:
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-	if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+	if (!perf_missing_features.branch_hw_idx &&
+	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+		perf_missing_features.branch_hw_idx = true;
+		pr_debug2("switching off branch HW index support\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
 		perf_missing_features.aux_output = true;
 		pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
 		goto out_close;
@@ -2169,7 +2178,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
 		if (data->branch_stack->nr > max_branch_nr)
 			return -EFAULT;
+
 		sz = data->branch_stack->nr * sizeof(struct branch_entry);
+		if (perf_evsel__has_branch_hw_idx(evsel))
+			sz += sizeof(u64);
+		else
+			data->no_hw_idx = true;
 		OVERFLOW_CHECK(array, sz, max_size);
 		array = (void *)array + sz;
 	}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dc14f4a823cd..33804740e2ca 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,7 @@ struct perf_missing_features {
 	bool ksymbol;
 	bool bpf;
 	bool aux_output;
+	bool branch_hw_idx;
 };
 
 extern struct perf_missing_features perf_missing_features;
@@ -389,6 +390,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
 	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
 }
 
+static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
 static inline bool evsel__has_callchain(const struct evsel *evsel)
 {
 	return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000000..fd192ddf93c1
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include "expr.h"
+#include "expr-bison.h"
+#define YY_EXTRA_TYPE int
+#include "expr-flex.h"
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+	int idx;
+
+	assert(ctx->num_ids < MAX_PARSE_ID);
+	idx = ctx->num_ids++;
+	ctx->ids[idx].name = name;
+	ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+	ctx->num_ids = 0;
+}
+
+static int
+__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
+	      int start)
+{
+	YY_BUFFER_STATE buffer;
+	void *scanner;
+	int ret;
+
+	ret = expr_lex_init_extra(start, &scanner);
+	if (ret)
+		return ret;
+
+	buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+	expr_debug = 1;
+#endif
+
+	ret = expr_parse(val, ctx, scanner);
+
+	expr__flush_buffer(buffer, scanner);
+	expr__delete_buffer(buffer, scanner);
+	expr_lex_destroy(scanner);
+	return ret;
+}
+
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+{
+	return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+}
+
+static bool
+already_seen(const char *val, const char *one, const char **other,
+	     int num_other)
+{
+	int i;
+
+	if (one && !strcasecmp(one, val))
+		return true;
+	for (i = 0; i < num_other; i++)
+		if (!strcasecmp(other[i], val))
+			return true;
+	return false;
+}
+
+int expr__find_other(const char *expr, const char *one, const char ***other,
+		     int *num_other)
+{
+	int err, i = 0, j = 0;
+	struct parse_ctx ctx;
+
+	expr__ctx_init(&ctx);
+	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+	if (err)
+		return -1;
+
+	*other = malloc((ctx.num_ids + 1) * sizeof(char *));
+	if (!*other)
+		return -ENOMEM;
+
+	for (i = 0, j = 0; i < ctx.num_ids; i++) {
+		const char *str = ctx.ids[i].name;
+
+		if (already_seen(str, one, *other, j))
+			continue;
+
+		str = strdup(str);
+		if (!str)
+			goto out;
+		(*other)[j++] = str;
+	}
+	(*other)[j] = NULL;
+
+out:
+	if (i != ctx.num_ids) {
+		while (--j)
+			free((char *) (*other)[i]);
+		free(*other);
+		err = -1;
+	}
+
+	*num_other = j;
+	return err;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 046160831f90..9377538f4097 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,7 +2,7 @@
 #ifndef PARSE_CTX_H
 #define PARSE_CTX_H 1
 
-#define EXPR_MAX_OTHER 15
+#define EXPR_MAX_OTHER 20
 #define MAX_PARSE_ID EXPR_MAX_OTHER
 
 struct parse_id {
@@ -17,10 +17,8 @@ struct parse_ctx {
 
 void expr__ctx_init(struct parse_ctx *ctx);
 void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-#ifndef IN_EXPR_Y
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
-#endif
-int expr__find_other(const char *p, const char *one, const char ***other,
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+int expr__find_other(const char *expr, const char *one, const char ***other,
 		int *num_other);
 
 #endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000000..eaad29243c23
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,114 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+	u64 num;
+
+	errno = 0;
+	num = strtoull(str, NULL, base);
+	if (errno)
+		return EXPR_ERROR;
+
+	yylval->num = num;
+	return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+	char *text = expr_get_text(scanner);
+
+	return __value(yylval, text, base, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str)
+{
+	char *ret = str;
+	char *dst = str;
+
+	while (*str) {
+		if (*str == '@')
+			*dst++ = '/';
+		else if (*str == '\\')
+			*dst++ = *++str;
+		else
+			*dst++ = *str;
+		str++;
+	}
+
+	*dst = 0x0;
+	return ret;
+}
+
+static int str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+	char *text = expr_get_text(scanner);
+
+	yylval->str = normalize(strdup(text));
+	if (!yylval->str)
+		return EXPR_ERROR;
+
+	yylval->str = normalize(yylval->str);
+	return token;
+}
+%}
+
+number		[0-9]+
+
+sch		[-,=]
+spec		\\{sch}
+sym		[0-9a-zA-Z_\.:@]+
+symbol		{spec}*{sym}*{spec}*{sym}*
+
+%%
+	{
+		int start_token;
+
+		start_token = expr_get_extra(yyscanner);
+
+		if (start_token) {
+			expr_set_extra(NULL, yyscanner);
+			return start_token;
+		}
+	}
+
+max		{ return MAX; }
+min		{ return MIN; }
+if		{ return IF; }
+else		{ return ELSE; }
+#smt_on		{ return SMT_ON; }
+{number}	{ return value(yyscanner, 10); }
+{symbol}	{ return str(yyscanner, ID); }
+"|"		{ return '|'; }
+"^"		{ return '^'; }
+"&"		{ return '&'; }
+"-"		{ return '-'; }
+"+"		{ return '+'; }
+"*"		{ return '*'; }
+"/"		{ return '/'; }
+"%"		{ return '%'; }
+"("		{ return '('; }
+")"		{ return ')'; }
+","		{ return ','; }
+.		{ }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+	return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 7d226241f1d7..4720cbe79357 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -1,31 +1,32 @@
 /* Simple expression parser */
 %{
+#define YYDEBUG 1
+#include <stdio.h>
 #include "util.h"
 #include "util/debug.h"
 #include <stdlib.h> // strtod()
 #define IN_EXPR_Y 1
 #include "expr.h"
 #include "smt.h"
-#include <assert.h>
 #include <string.h>
 
-#define MAXIDLEN 256
 %}
 
 %define api.pure full
 
 %parse-param { double *final_val }
 %parse-param { struct parse_ctx *ctx }
-%parse-param { const char **pp }
-%lex-param { const char **pp }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
 
 %union {
-	double num;
-	char id[MAXIDLEN+1];
+	double	 num;
+	char	*str;
 }
 
+%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
 %token <num> NUMBER
-%token <id> ID
+%token <str> ID
 %token MIN MAX IF ELSE SMT_ON
 %left MIN MAX IF
 %left '|'
@@ -37,11 +38,9 @@
 %type <num> expr if_expr
 
 %{
-static int expr__lex(YYSTYPE *res, const char **pp);
-
-static void expr__error(double *final_val __maybe_unused,
+static void expr_error(double *final_val __maybe_unused,
 		       struct parse_ctx *ctx __maybe_unused,
-		       const char **pp __maybe_unused,
+		       void *scanner,
 		       const char *s)
 {
 	pr_debug("%s\n", s);
@@ -63,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
 %}
 %%
 
+start:
+EXPR_PARSE all_expr
+|
+EXPR_OTHER all_other
+
+all_other: all_other other
+|
+
+other: ID
+{
+	if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
+		pr_err("failed: way too many variables");
+		YYABORT;
+	}
+
+	ctx->ids[ctx->num_ids++].name = $1;
+}
+|
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
+
+
 all_expr: if_expr			{ *final_val = $1; }
 	;
 
@@ -93,146 +113,3 @@ expr:	  NUMBER
 	;
 
 %%
-
-static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
-{
-	char *dst = res->id;
-	const char *s = p;
-
-	if (*p == '#')
-		*dst++ = *p++;
-
-	while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
-		if (p - s >= MAXIDLEN)
-			return -1;
-		/*
-		 * Allow @ instead of / to be able to specify pmu/event/ without
-		 * conflicts with normal division.
-		 */
-		if (*p == '@')
-			*dst++ = '/';
-		else if (*p == '\\')
-			*dst++ = *++p;
-		else
-			*dst++ = *p;
-		p++;
-	}
-	*dst = 0;
-	*pp = p;
-	dst = res->id;
-	switch (dst[0]) {
-	case 'm':
-		if (!strcmp(dst, "min"))
-			return MIN;
-		if (!strcmp(dst, "max"))
-			return MAX;
-		break;
-	case 'i':
-		if (!strcmp(dst, "if"))
-			return IF;
-		break;
-	case 'e':
-		if (!strcmp(dst, "else"))
-			return ELSE;
-		break;
-	case '#':
-		if (!strcasecmp(dst, "#smt_on"))
-			return SMT_ON;
-		break;
-	}
-	return ID;
-}
-
-static int expr__lex(YYSTYPE *res, const char **pp)
-{
-	int tok;
-	const char *s;
-	const char *p = *pp;
-
-	while (isspace(*p))
-		p++;
-	s = p;
-	switch (*p++) {
-	case '#':
-	case 'a' ... 'z':
-	case 'A' ... 'Z':
-		return expr__symbol(res, p - 1, pp);
-	case '0' ... '9': case '.':
-		res->num = strtod(s, (char **)&p);
-		tok = NUMBER;
-		break;
-	default:
-		tok = *s;
-		break;
-	}
-	*pp = p;
-	return tok;
-}
-
-/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
-{
-	int idx;
-	assert(ctx->num_ids < MAX_PARSE_ID);
-	idx = ctx->num_ids++;
-	ctx->ids[idx].name = name;
-	ctx->ids[idx].val = val;
-}
-
-void expr__ctx_init(struct parse_ctx *ctx)
-{
-	ctx->num_ids = 0;
-}
-
-static bool already_seen(const char *val, const char *one, const char **other,
-			 int num_other)
-{
-	int i;
-
-	if (one && !strcasecmp(one, val))
-		return true;
-	for (i = 0; i < num_other; i++)
-		if (!strcasecmp(other[i], val))
-			return true;
-	return false;
-}
-
-int expr__find_other(const char *p, const char *one, const char ***other,
-		     int *num_otherp)
-{
-	const char *orig = p;
-	int err = -1;
-	int num_other;
-
-	*other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
-	if (!*other)
-		return -1;
-
-	num_other = 0;
-	for (;;) {
-		YYSTYPE val;
-		int tok = expr__lex(&val, &p);
-		if (tok == 0) {
-			err = 0;
-			break;
-		}
-		if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
-			if (num_other >= EXPR_MAX_OTHER - 1) {
-				pr_debug("Too many extra events in %s\n", orig);
-				break;
-			}
-			(*other)[num_other] = strdup(val.id);
-			if (!(*other)[num_other])
-				return -1;
-			num_other++;
-		}
-	}
-	(*other)[num_other] = NULL;
-	*num_otherp = num_other;
-	if (err) {
-		*num_otherp = 0;
-		free(*other);
-		*other = NULL;
-	}
-	return err;
-}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4246e7447e54..acbd046bf95c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events)
 	free(events);
 }
 
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+		pr_warning("Reserved bits are set unexpectedly. "
+			   "Please update perf tool.\n");
+		return false;
+	}
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+		pr_warning("Unknown sample type (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->sample_type);
+		return false;
+	}
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+		pr_warning("Unknown read format (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->read_format);
+		return false;
+	}
+
+	if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+	    (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+		pr_warning("Unknown branch sample type (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->branch_sample_type);
+
+		return false;
+	}
+
+	return true;
+}
+
 static struct evsel *read_event_desc(struct feat_fd *ff)
 {
 	struct evsel *evsel, *events = NULL;
@@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
 
 		memcpy(&evsel->core.attr, buf, msz);
 
+		if (!perf_attr_check(&evsel->core.attr))
+			goto error;
+
 		if (do_read_u32(ff, &nr))
 			goto error;
 
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index ca5a8f4d007e..e74a5acf66d9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
 			  u64 *total_cycles)
 {
 	struct branch_info *bi;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 
 	/* If we have branch cycles always annotate them. */
-	if (bs && bs->nr && bs->entries[0].flags.cycles) {
+	if (bs && bs->nr && entries[0].flags.cycles) {
 		int i;
 
 		bi = sample__resolve_bstack(sample, al);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 33cf8928cf05..23c8289c2472 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	struct perf_sample sample = { .ip = 0, };
 	struct dummy_branch_stack {
 		u64			nr;
+		u64			hw_idx;
 		struct branch_entry	entries;
 	} dummy_bs;
 
@@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
 		dummy_bs = (struct dummy_branch_stack){
 			.nr = 1,
+			.hw_idx = -1ULL,
 			.entries = {
 				.from = sample.ip,
 				.to = sample.addr,
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index b5af680fc667..dbdffb6673fe 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir)
 			return -ENOMEM;
 		return 0;
 	}
+	pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+		 __func__, autoconf_path);
 	free(autoconf_path);
 	return -ENOENT;
 }
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb5c2cd44d30..fd14f1489802 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2081,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 {
 	unsigned int i;
 	const struct branch_stack *bs = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
 
 	if (!bi)
 		return NULL;
 
 	for (i = 0; i < bs->nr; i++) {
-		ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
-		ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
-		bi[i].flags = bs->entries[i].flags;
+		ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+		ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+		bi[i].flags = entries[i].flags;
 	}
 	return bi;
 }
@@ -2185,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	/* LBR only affects the user callchain */
 	if (i != chain_nr) {
 		struct branch_stack *lbr_stack = sample->branch_stack;
+		struct branch_entry *entries = perf_sample__branch_entries(sample);
 		int lbr_nr = lbr_stack->nr, j, k;
 		bool branch;
 		struct branch_flags *flags;
@@ -2210,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 					ip = chain->ips[j];
 				else if (j > i + 1) {
 					k = j - i - 2;
-					ip = lbr_stack->entries[k].from;
+					ip = entries[k].from;
 					branch = true;
-					flags = &lbr_stack->entries[k].flags;
+					flags = &entries[k].flags;
 				} else {
-					ip = lbr_stack->entries[0].to;
+					ip = entries[0].to;
 					branch = true;
-					flags = &lbr_stack->entries[0].flags;
-					branch_from =
-						lbr_stack->entries[0].from;
+					flags = &entries[0].flags;
+					branch_from = entries[0].from;
 				}
 			} else {
 				if (j < lbr_nr) {
 					k = lbr_nr - j - 1;
-					ip = lbr_stack->entries[k].from;
+					ip = entries[k].from;
 					branch = true;
-					flags = &lbr_stack->entries[k].flags;
+					flags = &entries[k].flags;
 				}
 				else if (j > lbr_nr)
 					ip = chain->ips[i + 1 - (j - lbr_nr)];
 				else {
-					ip = lbr_stack->entries[0].to;
+					ip = entries[0].to;
 					branch = true;
-					flags = &lbr_stack->entries[0].flags;
-					branch_from =
-						lbr_stack->entries[0].from;
+					flags = &entries[0].flags;
+					branch_from = entries[0].from;
 				}
 			}
 
@@ -2281,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 					    int max_stack)
 {
 	struct branch_stack *branch = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	struct ip_callchain *chain = sample->callchain;
 	int chain_nr = 0;
 	u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2328,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 
 		for (i = 0; i < nr; i++) {
 			if (callchain_param.order == ORDER_CALLEE) {
-				be[i] = branch->entries[i];
+				be[i] = entries[i];
 
 				if (chain == NULL)
 					continue;
@@ -2347,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				    be[i].from >= chain->ips[first_call] - 8)
 					first_call++;
 			} else
-				be[i] = branch->entries[branch->nr - i - 1];
+				be[i] = entries[branch->nr - i - 1];
 		}
 
 		memset(iter, 0, sizeof(struct iterations) * nr);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index a08ca276098e..53d96611e6a6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename)
 
 static inline int is_android_lib(const char *filename)
 {
-	return !strncmp(filename, "/data/app-lib", 13) ||
-	       !strncmp(filename, "/system/lib", 11);
+	return strstarts(filename, "/data/app-lib/") ||
+	       strstarts(filename, "/system/lib/");
 }
 
 static inline bool replace_android_lib(const char *filename, char *newfilename)
@@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
 
 	app_abi_length = strlen(app_abi);
 
-	if (!strncmp(filename, "/data/app-lib", 13)) {
+	if (strstarts(filename, "/data/app-lib/")) {
 		char *apk_path;
 
 		if (!app_abi_length)
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
 		return true;
 	}
 
-	if (!strncmp(filename, "/system/lib/", 11)) {
+	if (strstarts(filename, "/system/lib/")) {
 		char *ndk, *app;
 		const char *arch;
 		size_t ndk_length;
@@ -431,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 
 	if (map && map->dso) {
 		char *srcline = map__srcline(map, addr, NULL);
-		if (srcline != SRCLINE_UNKNOWN)
+		if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
 			ret = fprintf(fp, "%s%s", prefix, srcline);
 		free_srcline(srcline);
 	}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 02aee946b6c1..c3a8c701609a 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,8 @@
 #include <linux/string.h>
 #include <linux/zalloc.h>
 #include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
 
 struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 					 struct evsel *evsel,
@@ -399,13 +401,85 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 	strlist__delete(metriclist);
 }
 
+static void metricgroup__add_metric_weak_group(struct strbuf *events,
+					       const char **ids,
+					       int idnum)
+{
+	bool no_group = false;
+	int i;
+
+	for (i = 0; i < idnum; i++) {
+		pr_debug("found event %s\n", ids[i]);
+		/*
+		 * Duration time maps to a software event and can make
+		 * groups not count. Always use it outside a
+		 * group.
+		 */
+		if (!strcmp(ids[i], "duration_time")) {
+			if (i > 0)
+				strbuf_addf(events, "}:W,");
+			strbuf_addf(events, "duration_time");
+			no_group = true;
+			continue;
+		}
+		strbuf_addf(events, "%s%s",
+			i == 0 || no_group ? "{" : ",",
+			ids[i]);
+		no_group = false;
+	}
+	if (!no_group)
+		strbuf_addf(events, "}:W");
+}
+
+static void metricgroup__add_metric_non_group(struct strbuf *events,
+					      const char **ids,
+					      int idnum)
+{
+	int i;
+
+	for (i = 0; i < idnum; i++)
+		strbuf_addf(events, ",%s", ids[i]);
+}
+
+static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
+{
+	static bool violate_nmi_constraint;
+
+	if (!foot) {
+		pr_warning("Splitting metric group %s into standalone metrics.\n", name);
+		violate_nmi_constraint = true;
+		return;
+	}
+
+	if (!violate_nmi_constraint)
+		return;
+
+	pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+		   "    echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+		   "    perf stat ...\n"
+		   "    echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metricgroup__has_constraint(struct pmu_event *pe)
+{
+	if (!pe->metric_constraint)
+		return false;
+
+	if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+	    sysctl__nmi_watchdog_enabled()) {
+		metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+		return true;
+	}
+
+	return false;
+}
+
 static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 				   struct list_head *group_list)
 {
 	struct pmu_events_map *map = perf_pmu__find_map(NULL);
 	struct pmu_event *pe;
-	int ret = -EINVAL;
-	int i, j;
+	int i, ret = -EINVAL;
 
 	if (!map)
 		return 0;
@@ -422,7 +496,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 			const char **ids;
 			int idnum;
 			struct egroup *eg;
-			bool no_group = false;
 
 			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
 
@@ -431,27 +504,11 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 				continue;
 			if (events->len > 0)
 				strbuf_addf(events, ",");
-			for (j = 0; j < idnum; j++) {
-				pr_debug("found event %s\n", ids[j]);
-				/*
-				 * Duration time maps to a software event and can make
-				 * groups not count. Always use it outside a
-				 * group.
-				 */
-				if (!strcmp(ids[j], "duration_time")) {
-					if (j > 0)
-						strbuf_addf(events, "}:W,");
-					strbuf_addf(events, "duration_time");
-					no_group = true;
-					continue;
-				}
-				strbuf_addf(events, "%s%s",
-					j == 0 || no_group ? "{" : ",",
-					ids[j]);
-				no_group = false;
-			}
-			if (!no_group)
-				strbuf_addf(events, "}:W");
+
+			if (metricgroup__has_constraint(pe))
+				metricgroup__add_metric_non_group(events, ids, idnum);
+			else
+				metricgroup__add_metric_weak_group(events, ids, idnum);
 
 			eg = malloc(sizeof(struct egroup));
 			if (!eg) {
@@ -493,6 +550,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
 		}
 	}
 	free(nlist);
+
+	if (!ret)
+		metricgroup___watchdog_constraint_hint(NULL, true);
+
 	return ret;
 }
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3b664fa673a6..ab7108d22428 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
 {
 	void *data;
 	size_t mmap_len;
-	unsigned long node_mask;
+	unsigned long *node_mask;
+	unsigned long node_index;
+	int err = 0;
 
 	if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
 		data = map->aio.data[idx];
 		mmap_len = mmap__mmap_len(map);
-		node_mask = 1UL << cpu__get_node(cpu);
-		if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
-			pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
-				data, data + mmap_len, cpu__get_node(cpu));
+		node_index = cpu__get_node(cpu);
+		node_mask = bitmap_alloc(node_index + 1);
+		if (!node_mask) {
+			pr_err("Failed to allocate node mask for mbind: error %m\n");
 			return -1;
 		}
+		set_bit(node_index, node_mask);
+		if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+			pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+				data, data + mmap_len, node_index);
+			err = -1;
+		}
+		bitmap_free(node_mask);
 	}
 
-	return 0;
+	return err;
 }
 #else /* !HAVE_LIBNUMA_SUPPORT */
 static int perf_mmap__aio_alloc(struct mmap *map, int idx)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c01ba6f8fdad..a7dc0b096974 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 				path = zalloc(sizeof(*path));
 				if (!path)
 					return NULL;
-				path->system = malloc(MAX_EVENT_LENGTH);
-				if (!path->system) {
+				if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
 					free(path);
 					return NULL;
 				}
-				path->name = malloc(MAX_EVENT_LENGTH);
-				if (!path->name) {
+				if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
 					zfree(&path->system);
 					free(path);
 					return NULL;
 				}
-				strncpy(path->system, sys_dirent->d_name,
-					MAX_EVENT_LENGTH);
-				strncpy(path->name, evt_dirent->d_name,
-					MAX_EVENT_LENGTH);
 				return path;
 			}
 		}
@@ -1219,7 +1213,7 @@ static int config_attr(struct perf_event_attr *attr,
 static int get_config_terms(struct list_head *head_config,
 			    struct list_head *head_terms __maybe_unused)
 {
-#define ADD_CONFIG_TERM(__type)					\
+#define ADD_CONFIG_TERM(__type, __weak)				\
 	struct perf_evsel_config_term *__t;			\
 								\
 	__t = zalloc(sizeof(*__t));				\
@@ -1228,18 +1222,18 @@ static int get_config_terms(struct list_head *head_config,
 								\
 	INIT_LIST_HEAD(&__t->list);				\
 	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\
-	__t->weak	= term->weak;				\
+	__t->weak	= __weak;				\
 	list_add_tail(&__t->list, head_terms)
 
-#define ADD_CONFIG_TERM_VAL(__type, __name, __val)		\
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak)	\
 do {								\
-	ADD_CONFIG_TERM(__type);				\
+	ADD_CONFIG_TERM(__type, __weak);			\
 	__t->val.__name = __val;				\
 } while (0)
 
-#define ADD_CONFIG_TERM_STR(__type, __val)			\
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak)		\
 do {								\
-	ADD_CONFIG_TERM(__type);				\
+	ADD_CONFIG_TERM(__type, __weak);			\
 	__t->val.str = strdup(__val);				\
 	if (!__t->val.str) {					\
 		zfree(&__t);					\
@@ -1253,62 +1247,62 @@ do {								\
 	list_for_each_entry(term, head_config, list) {
 		switch (term->type_term) {
 		case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
-			ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num);
+			ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
-			ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num);
+			ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_TIME:
-			ADD_CONFIG_TERM_VAL(TIME, time, term->val.num);
+			ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
-			ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str);
+			ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
-			ADD_CONFIG_TERM_STR(BRANCH, term->val.str);
+			ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
 			ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
-					    term->val.num);
+					    term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_INHERIT:
 			ADD_CONFIG_TERM_VAL(INHERIT, inherit,
-					    term->val.num ? 1 : 0);
+					    term->val.num ? 1 : 0, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 			ADD_CONFIG_TERM_VAL(INHERIT, inherit,
-					    term->val.num ? 0 : 1);
+					    term->val.num ? 0 : 1, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 			ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
-					    term->val.num);
+					    term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
 			ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
-					    term->val.num);
+					    term->val.num, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 			ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
-					    term->val.num ? 1 : 0);
+					    term->val.num ? 1 : 0, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 			ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
-					    term->val.num ? 0 : 1);
+					    term->val.num ? 0 : 1, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
-			ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str);
+			ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_PERCORE:
 			ADD_CONFIG_TERM_VAL(PERCORE, percore,
-					    term->val.num ? true : false);
+					    term->val.num ? true : false, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
 			ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
-					    term->val.num ? 1 : 0);
+					    term->val.num ? 1 : 0, term->weak);
 			break;
 		case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 			ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
-					    term->val.num);
+					    term->val.num, term->weak);
 			break;
 		default:
 			break;
@@ -1345,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
 	}
 
 	if (bits)
-		ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits);
+		ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
 
 #undef ADD_CONFIG_TERM
 	return 0;
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 651203126c71..355d3458d4e6 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
 		bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
 		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
 		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+		bit_name(HW_INDEX),
 		{ .name = NULL, }
 	};
 #undef bit_name
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 5003ba403345..8c852948513e 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
 		} else
 			ret = strlist__add(sl, tev.event);
 		clear_probe_trace_event(&tev);
+		/* Skip if there is same name multi-probe event in the list */
+		if (ret == -EEXIST)
+			ret = 0;
 		if (ret < 0)
 			break;
 	}
@@ -301,10 +304,15 @@ int probe_file__get_events(int fd, struct strfilter *filter,
 		p = strchr(ent->s, ':');
 		if ((p && strfilter__compare(filter, p + 1)) ||
 		    strfilter__compare(filter, ent->s)) {
-			strlist__add(plist, ent->s);
+			ret = strlist__add(plist, ent->s);
+			if (ret == -ENOMEM) {
+				pr_err("strlist__add failed with -ENOMEM\n");
+				goto out;
+			}
 			ret = 0;
 		}
 	}
+out:
 	strlist__delete(namelist);
 
 	return ret;
@@ -511,7 +519,11 @@ static int probe_cache__load(struct probe_cache *pcache)
 				ret = -EINVAL;
 				goto out;
 			}
-			strlist__add(entry->tevlist, buf);
+			ret = strlist__add(entry->tevlist, buf);
+			if (ret == -ENOMEM) {
+				pr_err("strlist__add failed with -ENOMEM\n");
+				goto out;
+			}
 		}
 	}
 out:
@@ -672,7 +684,12 @@ int probe_cache__add_entry(struct probe_cache *pcache,
 		command = synthesize_probe_trace_command(&tevs[i]);
 		if (!command)
 			goto out_err;
-		strlist__add(entry->tevlist, command);
+		ret = strlist__add(entry->tevlist, command);
+		if (ret == -ENOMEM) {
+			pr_err("strlist__add failed with -ENOMEM\n");
+			goto out_err;
+		}
+
 		free(command);
 	}
 	list_add_tail(&entry->node, &pcache->entries);
@@ -853,9 +870,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
 			break;
 		}
 
-		strlist__add(entry->tevlist, buf);
+		ret = strlist__add(entry->tevlist, buf);
+
 		free(buf);
 		entry = NULL;
+
+		if (ret == -ENOMEM) {
+			pr_err("strlist__add failed with -ENOMEM\n");
+			break;
+		}
 	}
 	if (entry) {
 		list_del_init(&entry->node);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1c817add6ca4..e4cff49384f4 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -637,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
 		return -EINVAL;
 	}
 
-	/* Try to get actual symbol name from symtab */
-	symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+	if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+		/* If the DIE has entrypc, use it. */
+		symbol = dwarf_diename(sp_die);
+	} else {
+		/* Try to get actual symbol name and address from symtab */
+		symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+		eaddr = sym.st_value;
+	}
 	if (!symbol) {
 		pr_warning("Failed to find symbol at 0x%lx\n",
 			   (unsigned long)paddr);
 		return -ENOENT;
 	}
-	eaddr = sym.st_value;
 
 	tp->offset = (unsigned long)(paddr - eaddr);
 	tp->address = (unsigned long)paddr;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 80ca5d0ab7fe..8c1b27cd8b99 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
 					struct thread *thread)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	PyObject *pylist;
 	u64 i;
 
@@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
 			Py_FatalError("couldn't create Python dictionary");
 
 		pydict_set_item_string_decref(pyelem, "from",
-		    PyLong_FromUnsignedLongLong(br->entries[i].from));
+		    PyLong_FromUnsignedLongLong(entries[i].from));
 		pydict_set_item_string_decref(pyelem, "to",
-		    PyLong_FromUnsignedLongLong(br->entries[i].to));
+		    PyLong_FromUnsignedLongLong(entries[i].to));
 		pydict_set_item_string_decref(pyelem, "mispred",
-		    PyBool_FromLong(br->entries[i].flags.mispred));
+		    PyBool_FromLong(entries[i].flags.mispred));
 		pydict_set_item_string_decref(pyelem, "predicted",
-		    PyBool_FromLong(br->entries[i].flags.predicted));
+		    PyBool_FromLong(entries[i].flags.predicted));
 		pydict_set_item_string_decref(pyelem, "in_tx",
-		    PyBool_FromLong(br->entries[i].flags.in_tx));
+		    PyBool_FromLong(entries[i].flags.in_tx));
 		pydict_set_item_string_decref(pyelem, "abort",
-		    PyBool_FromLong(br->entries[i].flags.abort));
+		    PyBool_FromLong(entries[i].flags.abort));
 		pydict_set_item_string_decref(pyelem, "cycles",
-		    PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+		    PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
 
 		thread__find_map_fb(thread, sample->cpumode,
-				    br->entries[i].from, &al);
+				    entries[i].from, &al);
 		dsoname = get_dsoname(al.map);
 		pydict_set_item_string_decref(pyelem, "from_dsoname",
 					      _PyUnicode_FromString(dsoname));
 
 		thread__find_map_fb(thread, sample->cpumode,
-				    br->entries[i].to, &al);
+				    entries[i].to, &al);
 		dsoname = get_dsoname(al.map);
 		pydict_set_item_string_decref(pyelem, "to_dsoname",
 					      _PyUnicode_FromString(dsoname));
@@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
 					   struct thread *thread)
 {
 	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	PyObject *pylist;
 	u64 i;
 	char bf[512];
@@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
 			Py_FatalError("couldn't create Python dictionary");
 
 		thread__find_symbol_fb(thread, sample->cpumode,
-				       br->entries[i].from, &al);
+				       entries[i].from, &al);
 		get_symoff(al.sym, &al, true, bf, sizeof(bf));
 		pydict_set_item_string_decref(pyelem, "from",
 					      _PyUnicode_FromString(bf));
 
 		thread__find_symbol_fb(thread, sample->cpumode,
-				       br->entries[i].to, &al);
+				       entries[i].to, &al);
 		get_symoff(al.sym, &al, true, bf, sizeof(bf));
 		pydict_set_item_string_decref(pyelem, "to",
 					      _PyUnicode_FromString(bf));
 
-		get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+		get_br_mspred(&entries[i].flags, bf, sizeof(bf));
 		pydict_set_item_string_decref(pyelem, "pred",
 					      _PyUnicode_FromString(bf));
 
-		if (br->entries[i].flags.in_tx) {
+		if (entries[i].flags.in_tx) {
 			pydict_set_item_string_decref(pyelem, "in_tx",
 					      _PyUnicode_FromString("X"));
 		} else {
@@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
 					      _PyUnicode_FromString("-"));
 		}
 
-		if (br->entries[i].flags.abort) {
+		if (entries[i].flags.abort) {
 			pydict_set_item_string_decref(pyelem, "abort",
 					      _PyUnicode_FromString("A"));
 		} else {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d7d25b23e3..055b00abd56d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
 {
 	struct ip_callchain *callchain = sample->callchain;
 	struct branch_stack *lbr_stack = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	u64 kernel_callchain_nr = callchain->nr;
 	unsigned int i;
 
@@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
 			       i, callchain->ips[i]);
 
 		printf("..... %2d: %016" PRIx64 "\n",
-		       (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+		       (int)(kernel_callchain_nr), entries[0].to);
 		for (i = 0; i < lbr_stack->nr; i++)
 			printf("..... %2d: %016" PRIx64 "\n",
-			       (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+			       (int)(i + kernel_callchain_nr + 1), entries[i].from);
 	}
 }
 
@@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel,
 
 static void branch_stack__printf(struct perf_sample *sample, bool callstack)
 {
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
 	uint64_t i;
 
 	printf("%s: nr:%" PRIu64 "\n",
@@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
 		sample->branch_stack->nr);
 
 	for (i = 0; i < sample->branch_stack->nr; i++) {
-		struct branch_entry *e = &sample->branch_stack->entries[i];
+		struct branch_entry *e = &entries[i];
 
 		if (!callstack) {
 			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index aa344a163eaf..8a065a6f9713 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -2,11 +2,13 @@ from os import getenv
 from subprocess import Popen, PIPE
 from re import sub
 
+cc = getenv("CC")
+cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+
 def clang_has_option(option):
-    return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+    return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
 
-cc = getenv("CC")
-if cc == "clang":
+if cc_is_clang:
     from distutils.sysconfig import get_config_vars
     vars = get_config_vars()
     for var in ('CFLAGS', 'OPT'):
@@ -40,7 +42,7 @@ class install_lib(_install_lib):
 cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
-if cc != "clang":
+if not cc_is_clang:
     cflags += ['-Wno-cast-function-type' ]
 
 src_perf  = getenv('srctree') + '/tools/perf'
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..76c6052b12e2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -16,6 +16,7 @@
 #include <linux/ctype.h>
 #include "cgroup.h"
 #include <api/fs/fs.h>
+#include "util.h"
 
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
@@ -110,7 +111,7 @@ static void aggr_printout(struct perf_stat_config *config,
 			config->csv_sep);
 			break;
 	case AGGR_NONE:
-		if (evsel->percore) {
+		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
 				cpu_map__id_to_socket(id),
 				cpu_map__id_to_die(id),
@@ -628,7 +629,7 @@ static void aggr_cb(struct perf_stat_config *config,
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
 				   char *prefix, bool metric_only,
-				   bool *first)
+				   bool *first, int cpu)
 {
 	struct aggr_data ad;
 	FILE *output = config->output;
@@ -654,7 +655,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		fprintf(output, "%s", prefix);
 
 	uval = val * counter->scale;
-	printout(config, id, nr, counter, uval, prefix,
+	printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
 		 run, ena, 1.0, &rt_stat);
 	if (!metric_only)
 		fputc('\n', output);
@@ -687,7 +688,7 @@ static void print_aggr(struct perf_stat_config *config,
 		evlist__for_each_entry(evlist, counter) {
 			print_counter_aggrdata(config, counter, s,
 					       prefix, metric_only,
-					       &first);
+					       &first, -1);
 		}
 		if (metric_only)
 			fputc('\n', output);
@@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config)
 {
 	double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
 	FILE *output = config->output;
-	int n;
 
 	if (!config->null_run)
 		fprintf(output, "\n");
@@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config)
 	}
 	fprintf(output, "\n\n");
 
-	if (config->print_free_counters_hint &&
-	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
-	    n > 0)
+	if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
 		fprintf(output,
 "Some events weren't counted. Try disabling the NMI watchdog:\n"
 "	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1146,6 +1144,26 @@ static void print_footer(struct perf_stat_config *config)
 			"the same PMU. Try reorganizing the group.\n");
 }
 
+static void print_percore_thread(struct perf_stat_config *config,
+				 struct evsel *counter, char *prefix)
+{
+	int s, s2, id;
+	bool first = true;
+
+	for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+		s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+		for (s = 0; s < config->aggr_map->nr; s++) {
+			id = config->aggr_map->map[s];
+			if (s2 == id)
+				break;
+		}
+
+		print_counter_aggrdata(config, counter, s,
+				       prefix, false,
+				       &first, i);
+	}
+}
+
 static void print_percore(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
@@ -1157,13 +1175,16 @@ static void print_percore(struct perf_stat_config *config,
 	if (!(config->aggr_map || config->aggr_get_id))
 		return;
 
+	if (config->percore_show_thread)
+		return print_percore_thread(config, counter, prefix);
+
 	for (s = 0; s < config->aggr_map->nr; s++) {
 		if (prefix && metric_only)
 			fprintf(output, "%s", prefix);
 
 		print_counter_aggrdata(config, counter, s,
 				       prefix, metric_only,
-				       &first);
+				       &first, -1);
 	}
 
 	if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 90d23cc3c8d4..0fd713d3674f 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -777,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config,
 	}
 
 	if (!metric_events[i]) {
-		const char *p = metric_expr;
-
-		if (expr__parse(&ratio, &pctx, &p) == 0) {
+		if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
 			char *unit;
 			char metric_bf[64];
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
 	bool			 walltime_run_table;
 	bool			 all_kernel;
 	bool			 all_user;
+	bool			 percore_show_thread;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1077013d8ce2..26bc6a0096ce 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1622,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map)
 		goto out;
 	}
 
-	if (dso->kernel) {
+	kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+	if (dso->kernel && !kmod) {
 		if (dso->kernel == DSO_TYPE_KERNEL)
 			ret = dso__load_kernel_sym(dso, map);
 		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -1650,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map)
 	if (!name)
 		goto out;
 
-	kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
-		dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
-		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
-		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
-
-
 	/*
 	 * Read the build id if possible. This is required for
 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c423298fe62d..3f28af39f9c6 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -345,6 +345,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			continue;
 
 		event->mmap2.ino = (u64)ino;
+		event->mmap2.ino_generation = 0;
 
 		/*
 		 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
@@ -1183,7 +1184,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
 		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-		sz += sizeof(u64);
+		/* nr, hw_idx */
+		sz += 2 * sizeof(u64);
 		result += sz;
 	}
 
@@ -1344,7 +1346,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
 
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
 		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
-		sz += sizeof(u64);
+		/* nr, hw_idx */
+		sz += 2 * sizeof(u64);
 		memcpy(array, sample->branch_stack, sz);
 		array = (void *)array + sz;
 	}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 969ae560dad9..d707c9624dd9 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -55,6 +55,24 @@ int sysctl__max_stack(void)
 	return sysctl_perf_event_max_stack;
 }
 
+bool sysctl__nmi_watchdog_enabled(void)
+{
+	static bool cached;
+	static bool nmi_watchdog;
+	int value;
+
+	if (cached)
+		return nmi_watchdog;
+
+	if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+		return false;
+
+	nmi_watchdog = (value > 0) ? true : false;
+	cached = true;
+
+	return nmi_watchdog;
+}
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9969b8b46f7c..f486fdd3a538 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -29,6 +29,8 @@ size_t hex_width(u64 v);
 
 int sysctl__max_stack(void);
 
+bool sysctl__nmi_watchdog_enabled(void);
+
 int fetch_kernel_version(unsigned int *puint,
 			 char *str, size_t str_sz);
 #define KVER_VERSION(x)		(((x) >> 16) & 0xff)
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
index f698a0e5bfa6..0b319fc8bb17 100644
--- a/tools/power/acpi/.gitignore
+++ b/tools/power/acpi/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /acpidbg
 /acpidump
 /ec
diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore
index 1f9977cc609c..7677329c42a6 100644
--- a/tools/power/cpupower/.gitignore
+++ b/tools/power/cpupower/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 .libs
 libcpupower.so
 libcpupower.so.*
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 33dc34db4f3c..20f46348271b 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -82,7 +82,7 @@ static struct pci_access *pci_acc;
 static struct pci_dev *amd_fam14h_pci_dev;
 static int nbp1_entered;
 
-struct timespec start_time;
+static struct timespec start_time;
 static unsigned long long timediff;
 
 #ifdef DEBUG
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 3c4cee160b0e..a65f7d011513 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -19,7 +19,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor;
 
 static unsigned long long **previous_count;
 static unsigned long long **current_count;
-struct timespec start_time;
+static struct timespec start_time;
 static unsigned long long timediff;
 
 static int cpuidle_get_count_percent(unsigned int id, double *percent,
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 6d44fec55ad5..7c77045fef52 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -27,6 +27,8 @@ struct cpuidle_monitor *all_monitors[] = {
 0
 };
 
+int cpu_count;
+
 static struct cpuidle_monitor *monitors[MONITORS_MAX];
 static unsigned int avail_monitors;
 
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 5b5eb1da0cce..c559d3115330 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -25,7 +25,7 @@
 #endif
 #define CSTATE_DESC_LEN 60
 
-int cpu_count;
+extern int cpu_count;
 
 /* Hard to define the right names ...: */
 enum power_range_e {
diff --git a/tools/power/x86/intel-speed-select/.gitignore b/tools/power/x86/intel-speed-select/.gitignore
index f61145925ce9..a814f89fe75f 100644
--- a/tools/power/x86/intel-speed-select/.gitignore
+++ b/tools/power/x86/intel-speed-select/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 include/
 intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 2b2b8167c65b..b73763489410 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.2";
+static const char *version_str = "v1.3";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
@@ -42,6 +42,7 @@ static int out_format_json;
 static int cmd_help;
 static int force_online_offline;
 static int auto_mode;
+static int fact_enable_fail;
 
 /* clos related */
 static int current_clos = -1;
@@ -61,6 +62,18 @@ struct _cpu_map {
 };
 struct _cpu_map *cpu_map;
 
+struct cpu_topology {
+	short cpu;
+	short core_id;
+	short pkg_id;
+	short die_id;
+};
+
+FILE *get_output_file(void)
+{
+	return outf;
+}
+
 void debug_printf(const char *format, ...)
 {
 	va_list args;
@@ -82,6 +95,14 @@ int is_clx_n_platform(void)
 	return 0;
 }
 
+int is_skx_based_platform(void)
+{
+	if (cpu_model == 0x55)
+		return 1;
+
+	return 0;
+}
+
 static int update_cpu_model(void)
 {
 	unsigned int ebx, ecx, edx;
@@ -175,25 +196,137 @@ int out_format_is_json(void)
 	return out_format_json;
 }
 
+static int get_stored_topology_info(int cpu, int *core_id, int *pkg_id, int *die_id)
+{
+	const char *pathname = "/tmp/isst_cpu_topology.dat";
+	struct cpu_topology cpu_top;
+	FILE *fp;
+	int ret;
+
+	fp = fopen(pathname, "rb");
+	if (!fp)
+		return -1;
+
+	ret = fseek(fp, cpu * sizeof(cpu_top), SEEK_SET);
+	if (ret)
+		goto err_ret;
+
+	ret = fread(&cpu_top, sizeof(cpu_top), 1, fp);
+	if (ret != 1) {
+		ret = -1;
+		goto err_ret;
+	}
+
+	*pkg_id = cpu_top.pkg_id;
+	*core_id = cpu_top.core_id;
+	*die_id = cpu_top.die_id;
+	ret = 0;
+
+err_ret:
+	fclose(fp);
+
+	return ret;
+}
+
+static void store_cpu_topology(void)
+{
+	const char *pathname = "/tmp/isst_cpu_topology.dat";
+	FILE *fp;
+	int i;
+
+	fp = fopen(pathname, "rb");
+	if (fp) {
+		/* Mapping already exists */
+		fclose(fp);
+		return;
+	}
+
+	fp = fopen(pathname, "wb");
+	if (!fp) {
+		fprintf(stderr, "Can't create file:%s\n", pathname);
+		return;
+	}
+
+	for (i = 0; i < topo_max_cpus; ++i) {
+		struct cpu_topology cpu_top;
+
+		cpu_top.core_id = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/core_id", i);
+		if (cpu_top.core_id < 0)
+			cpu_top.core_id = -1;
+
+		cpu_top.pkg_id = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i);
+		if (cpu_top.pkg_id < 0)
+			cpu_top.pkg_id = -1;
+
+		cpu_top.die_id = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/die_id", i);
+		if (cpu_top.die_id < 0)
+			cpu_top.die_id = -1;
+
+		cpu_top.cpu = i;
+
+		if (fwrite(&cpu_top, sizeof(cpu_top), 1, fp) != 1) {
+			fprintf(stderr, "Can't write to:%s\n", pathname);
+			break;
+		}
+	}
+
+	fclose(fp);
+}
+
 int get_physical_package_id(int cpu)
 {
-	return parse_int_file(
-		0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
-		cpu);
+	int ret;
+
+	ret = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+			cpu);
+	if (ret < 0) {
+		int core_id, pkg_id, die_id;
+
+		ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+		if (!ret)
+			return pkg_id;
+	}
+
+	return ret;
 }
 
 int get_physical_core_id(int cpu)
 {
-	return parse_int_file(
-		0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+	int ret;
+
+	ret = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/core_id",
+			cpu);
+	if (ret < 0) {
+		int core_id, pkg_id, die_id;
+
+		ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+		if (!ret)
+			return core_id;
+	}
+
+	return ret;
 }
 
 int get_physical_die_id(int cpu)
 {
 	int ret;
 
-	ret = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/topology/die_id",
-			     cpu);
+	ret = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/die_id",
+			cpu);
+	if (ret < 0) {
+		int core_id, pkg_id, die_id;
+
+		ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+		if (!ret)
+			return die_id;
+	}
+
 	if (ret < 0)
 		ret = 0;
 
@@ -219,8 +352,14 @@ static void set_cpu_online_offline(int cpu, int state)
 		 "/sys/devices/system/cpu/cpu%d/online", cpu);
 
 	fd = open(buffer, O_WRONLY);
-	if (fd < 0)
+	if (fd < 0) {
+		if (!cpu && state) {
+			fprintf(stderr, "This system is not configured for CPU 0 online/offline\n");
+			fprintf(stderr, "Ignoring online request for CPU 0 as this is already online\n");
+			return;
+		}
 		err(-1, "%s open failed", buffer);
+	}
 
 	if (state)
 		ret = write(fd, "1\n", 2);
@@ -259,7 +398,12 @@ static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
 		die_id = get_physical_die_id(i);
 		if (die_id < 0)
 			die_id = 0;
-		pkg_id = get_physical_package_id(i);
+
+		pkg_id = parse_int_file(0,
+			"/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i);
+		if (pkg_id < 0)
+			continue;
+
 		/* Create an unique id for package, die combination to store */
 		pkg_id = (MAX_PACKAGE_COUNT * pkg_id + die_id);
 
@@ -281,7 +425,7 @@ static void for_each_online_target_cpu_in_set(
 	void (*callback)(int, void *, void *, void *, void *), void *arg1,
 	void *arg2, void *arg3, void *arg4)
 {
-	int i;
+	int i, found = 0;
 
 	for (i = 0; i < topo_max_cpus; ++i) {
 		int online;
@@ -295,9 +439,14 @@ static void for_each_online_target_cpu_in_set(
 			online =
 				1; /* online entry for CPU 0 needs some special configs */
 
-		if (online && callback)
+		if (online && callback) {
 			callback(i, arg1, arg2, arg3, arg4);
+			found = 1;
+		}
 	}
+
+	if (!found)
+		fprintf(stderr, "No valid CPU in the list\n");
 }
 
 #define BITMASK_SIZE 32
@@ -305,14 +454,27 @@ static void set_max_cpu_num(void)
 {
 	FILE *filep;
 	unsigned long dummy;
+	int i;
 
 	topo_max_cpus = 0;
-	filep = fopen_or_exit(
-		"/sys/devices/system/cpu/cpu0/topology/thread_siblings", "r");
+	for (i = 0; i < 256; ++i) {
+		char path[256];
+
+		snprintf(path, sizeof(path),
+			 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i);
+		filep = fopen(path, "r");
+		if (filep)
+			break;
+	}
+
+	if (!filep) {
+		fprintf(stderr, "Can't get max cpu number\n");
+		exit(0);
+	}
+
 	while (fscanf(filep, "%lx,", &dummy) == 1)
 		topo_max_cpus += BITMASK_SIZE;
 	fclose(filep);
-	topo_max_cpus--; /* 0 based */
 
 	debug_printf("max cpus %d\n", topo_max_cpus);
 }
@@ -362,6 +524,10 @@ static void set_cpu_present_cpu_mask(void)
 				die_id = 0;
 
 			pkg_id = get_physical_package_id(i);
+			if (pkg_id < 0) {
+				fprintf(stderr, "Failed to get package id, CPU %d may be offline\n", i);
+				continue;
+			}
 			if (pkg_id < MAX_PACKAGE_COUNT &&
 			    die_id < MAX_DIE_PER_PACKAGE) {
 				int core_id = get_physical_core_id(i);
@@ -542,7 +708,11 @@ static int isst_send_mmio_command(unsigned int cpu, unsigned int reg, int write,
 	}
 
 	if (ioctl(fd, cmd, &io_regs) == -1) {
-		perror("ISST_IF_IO_CMD");
+		if (errno == ENOTTY) {
+			perror("ISST_IF_IO_COMMAND\n");
+			fprintf(stderr, "Check presence of kernel modules: isst_if_mmio\n");
+			exit(0);
+		}
 		fprintf(outf, "Error: mmio_cmd cpu:%d reg:%x read_write:%x\n",
 			cpu, reg, write);
 	} else {
@@ -571,7 +741,8 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
 		"mbox_send: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n",
 		cpu, command, sub_command, parameter, req_data);
 
-	if (isst_platform_info.mmio_supported && command == CONFIG_CLOS) {
+	if (!is_skx_based_platform() && command == CONFIG_CLOS &&
+	    sub_command != CLOS_PM_QOS_CONFIG) {
 		unsigned int value;
 		int write = 0;
 		int clos_id, core_id, ret = 0;
@@ -620,10 +791,14 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
 		err(-1, "%s open failed", pathname);
 
 	if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) {
-		perror("ISST_IF_MBOX_COMMAND");
-		fprintf(outf,
-			"Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n",
-			cpu, command, sub_command, parameter, req_data);
+		if (errno == ENOTTY) {
+			perror("ISST_IF_MBOX_COMMAND\n");
+			fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n");
+			exit(0);
+		}
+		debug_printf(
+			"Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n",
+			cpu, command, sub_command, parameter, req_data, errno);
 		return -1;
 	} else {
 		*resp = mbox_cmds.mbox_cmd[0].resp_data;
@@ -656,7 +831,7 @@ int isst_send_msr_command(unsigned int cpu, unsigned int msr, int write,
 		msr_cmds.msr_cmd[0].data = *req_resp;
 
 	if (ioctl(fd, ISST_IF_MSR_COMMAND, &msr_cmds) == -1) {
-		perror("ISST_IF_MSR_COMMAD");
+		perror("ISST_IF_MSR_COMMAND");
 		fprintf(outf, "Error: msr_cmd cpu:%d msr:%x read_write:%d\n",
 			cpu, msr, write);
 	} else {
@@ -697,12 +872,85 @@ static int isst_fill_platform_info(void)
 	return 0;
 }
 
+static void isst_print_extended_platform_info(void)
+{
+	int cp_state, cp_cap, fact_support = 0, pbf_support = 0;
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	struct isst_pkg_ctdp pkg_dev;
+	int ret, i, j;
+	FILE *filep;
+
+	for (i = 0; i < 256; ++i) {
+		char path[256];
+
+		snprintf(path, sizeof(path),
+			 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i);
+		filep = fopen(path, "r");
+		if (filep)
+			break;
+	}
+
+	if (!filep)
+		return;
+
+	fclose(filep);
+
+	ret = isst_get_ctdp_levels(i, &pkg_dev);
+	if (ret)
+		return;
+
+	if (pkg_dev.enabled) {
+		fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is supported\n");
+	} else {
+		fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is not supported\n");
+		fprintf(outf, "Only performance level 0 (base level) is present\n");
+	}
+
+	if (pkg_dev.locked)
+		fprintf(outf, "TDP level change control is locked\n");
+	else
+		fprintf(outf, "TDP level change control is unlocked, max level: %d \n", pkg_dev.levels);
+
+	for (j = 0; j <= pkg_dev.levels; ++j) {
+		ret = isst_get_ctdp_control(i, j, &ctdp_level);
+		if (ret)
+			continue;
+
+		if (!fact_support && ctdp_level.fact_support)
+			fact_support = 1;
+
+		if (!pbf_support && ctdp_level.pbf_support)
+			pbf_support = 1;
+	}
+
+	if (fact_support)
+		fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is supported\n");
+	else
+		fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is not supported\n");
+
+	if (pbf_support)
+		fprintf(outf, "Intel(R) SST-BF (feature base-freq) is supported\n");
+	else
+		fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n");
+
+	ret = isst_read_pm_config(i, &cp_state, &cp_cap);
+	if (cp_cap)
+		fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n");
+	else
+		fprintf(outf, "Intel(R) SST-CP (feature core-power) is not supported\n");
+}
+
 static void isst_print_platform_information(void)
 {
 	struct isst_if_platform_info platform_info;
 	const char *pathname = "/dev/isst_interface";
 	int fd;
 
+	if (is_clx_n_platform()) {
+		fprintf(stderr, "\nThis option in not supported on this platform\n");
+		exit(0);
+	}
+
 	fd = open(pathname, O_RDWR);
 	if (fd < 0)
 		err(-1, "%s open failed", pathname);
@@ -718,6 +966,7 @@ static void isst_print_platform_information(void)
 			platform_info.mbox_supported);
 		fprintf(outf, "Platform: mmio supported : %d\n",
 			platform_info.mmio_supported);
+		isst_print_extended_platform_info();
 	}
 
 	close(fd);
@@ -725,6 +974,7 @@ static void isst_print_platform_information(void)
 	exit(0);
 }
 
+static char *local_str0, *local_str1;
 static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 				 void *arg4)
 {
@@ -734,13 +984,14 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 	fn_ptr = arg1;
 	ret = fn_ptr(cpu, arg2);
 	if (ret)
-		perror("get_tdp_*");
+		isst_display_error_info_message(1, "get_tdp_* failed", 0, 0);
 	else
 		isst_ctdp_display_core_info(cpu, outf, arg3,
-					    *(unsigned int *)arg4);
+					    *(unsigned int *)arg4,
+					    local_str0, local_str1);
 }
 
-#define _get_tdp_level(desc, suffix, object, help)                                \
+#define _get_tdp_level(desc, suffix, object, help, str0, str1)			\
 	static void get_tdp_##object(int arg)                                    \
 	{                                                                         \
 		struct isst_pkg_ctdp ctdp;                                        \
@@ -751,6 +1002,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 				help);                                            \
 			exit(0);                                                  \
 		}                                                                 \
+		local_str0 = str0;						  \
+		local_str1 = str1;						  \
 		isst_ctdp_display_information_start(outf);                        \
 		if (max_target_cpus)                                              \
 			for_each_online_target_cpu_in_set(                        \
@@ -764,12 +1017,12 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 		isst_ctdp_display_information_end(outf);                          \
 	}
 
-_get_tdp_level("get-config-levels", levels, levels, "TDP levels");
-_get_tdp_level("get-config-version", levels, version, "TDP version");
-_get_tdp_level("get-config-enabled", levels, enabled, "TDP enable status");
+_get_tdp_level("get-config-levels", levels, levels, "Max TDP level", NULL, NULL);
+_get_tdp_level("get-config-version", levels, version, "TDP version", NULL, NULL);
+_get_tdp_level("get-config-enabled", levels, enabled, "perf-profile enable status", "disabled", "enabled");
 _get_tdp_level("get-config-current_level", levels, current_level,
-	       "Current TDP Level");
-_get_tdp_level("get-lock-status", levels, locked, "TDP lock status");
+	       "Current TDP Level", NULL, NULL);
+_get_tdp_level("get-lock-status", levels, locked, "TDP lock status", "unlocked", "locked");
 
 struct isst_pkg_ctdp clx_n_pkg_dev;
 
@@ -902,9 +1155,14 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2,
 {
 	int ret;
 
+	if (tdp_level != 0xff && tdp_level != 0) {
+		isst_display_error_info_message(1, "Invalid level", 1, tdp_level);
+		exit(0);
+	}
+
 	ret = clx_n_config(cpu);
 	if (ret) {
-		perror("isst_get_process_ctdp");
+		debug_printf("clx_n_config failed");
 	} else {
 		struct isst_pkg_ctdp_level_info *ctdp_level;
 		struct isst_pbf_info *pbf_info;
@@ -926,7 +1184,9 @@ static void dump_isst_config_for_cpu(int cpu, void *arg1, void *arg2,
 	memset(&pkg_dev, 0, sizeof(pkg_dev));
 	ret = isst_get_process_ctdp(cpu, tdp_level, &pkg_dev);
 	if (ret) {
-		perror("isst_get_process_ctdp");
+		isst_display_error_info_message(1, "Failed to get perf-profile info on cpu", 1, cpu);
+		isst_ctdp_display_information_end(outf);
+		exit(1);
 	} else {
 		isst_ctdp_display_information(cpu, outf, tdp_level, &pkg_dev);
 		isst_get_process_ctdp_complete(cpu, &pkg_dev);
@@ -969,9 +1229,11 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 	int ret;
 
 	ret = isst_set_tdp_level(cpu, tdp_level);
-	if (ret)
-		perror("set_tdp_level_for_cpu");
-	else {
+	if (ret) {
+		isst_display_error_info_message(1, "Set TDP level failed", 0, 0);
+		isst_ctdp_display_information_end(outf);
+		exit(1);
+	} else {
 		isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
 				    ret);
 		if (force_online_offline) {
@@ -1009,11 +1271,13 @@ static void set_tdp_level(int arg)
 			"\t Arguments: -l|--level : Specify tdp level\n");
 		fprintf(stderr,
 			"\t Optional Arguments: -o | online : online/offline for the tdp level\n");
+		fprintf(stderr,
+			"\t  online/offline operation has limitations, refer to Linux hotplug documentation\n");
 		exit(0);
 	}
 
 	if (tdp_level == 0xff) {
-		fprintf(outf, "Invalid command: specify tdp_level\n");
+		isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0);
 		exit(1);
 	}
 	isst_ctdp_display_information_start(outf);
@@ -1033,7 +1297,7 @@ static void clx_n_dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2,
 
 	ret = clx_n_config(cpu);
 	if (ret) {
-		perror("isst_get_process_ctdp");
+		isst_display_error_info_message(1, "clx_n_config failed", 0, 0);
 	} else {
 		struct isst_pkg_ctdp_level_info *ctdp_level;
 		struct isst_pbf_info *pbf_info;
@@ -1054,7 +1318,9 @@ static void dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_get_pbf_info(cpu, tdp_level, &pbf_info);
 	if (ret) {
-		perror("isst_get_pbf_info");
+		isst_display_error_info_message(1, "Failed to get base-freq info at this level", 1, tdp_level);
+		isst_ctdp_display_information_end(outf);
+		exit(1);
 	} else {
 		isst_pbf_display_information(cpu, outf, tdp_level, &pbf_info);
 		isst_get_pbf_info_complete(&pbf_info);
@@ -1074,7 +1340,7 @@ static void dump_pbf_config(int arg)
 	}
 
 	if (tdp_level == 0xff) {
-		fprintf(outf, "Invalid command: specify tdp_level\n");
+		isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0);
 		exit(1);
 	}
 
@@ -1100,7 +1366,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max)
 
 	ret = isst_pm_get_clos(cpu, clos, &clos_config);
 	if (ret) {
-		perror("isst_pm_get_clos");
+		isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0);
 		return ret;
 	}
 	clos_config.clos_min = min;
@@ -1109,7 +1375,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max)
 	clos_config.clos_prop_prio = wt;
 	ret = isst_set_clos(cpu, clos, &clos_config);
 	if (ret) {
-		perror("isst_pm_set_clos");
+		isst_display_error_info_message(1, "isst_set_clos failed", 0, 0);
 		return ret;
 	}
 
@@ -1153,7 +1419,7 @@ static int set_clx_pbf_cpufreq_scaling_min_max(int cpu)
 
 	ret = clx_n_config(cpu);
 	if (ret) {
-		perror("set_clx_pbf_cpufreq_scaling_min_max");
+		debug_printf("cpufreq_scaling_min_max failed for CLX");
 		return ret;
 	}
 
@@ -1316,7 +1582,7 @@ static int set_core_priority_and_min(int cpu, int mask_size,
 		debug_printf("Associate cpu: %d clos: %d\n", i, clos);
 		ret = isst_clos_associate(i, clos);
 		if (ret) {
-			perror("isst_clos_associate");
+			isst_display_error_info_message(1, "isst_clos_associate failed", 0, 0);
 			return ret;
 		}
 	}
@@ -1332,14 +1598,14 @@ static int set_pbf_core_power(int cpu)
 
 	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
 	if (ret) {
-		perror("isst_get_ctdp_levels");
+		debug_printf("isst_get_ctdp_levels failed");
 		return ret;
 	}
 	debug_printf("Current_level: %d\n", pkg_dev.current_level);
 
 	ret = isst_get_pbf_info(cpu, pkg_dev.current_level, &pbf_info);
 	if (ret) {
-		perror("isst_get_pbf_info");
+		debug_printf("isst_get_pbf_info failed");
 		return ret;
 	}
 	debug_printf("p1_high: %d p1_low: %d\n", pbf_info.p1_high,
@@ -1349,13 +1615,13 @@ static int set_pbf_core_power(int cpu)
 					pbf_info.core_cpumask,
 					pbf_info.p1_high, pbf_info.p1_low);
 	if (ret) {
-		perror("set_core_priority_and_min");
+		debug_printf("set_core_priority_and_min failed");
 		return ret;
 	}
 
 	ret = isst_pm_qos_config(cpu, 1, 1);
 	if (ret) {
-		perror("isst_pm_qos_config");
+		debug_printf("isst_pm_qos_config failed");
 		return ret;
 	}
 
@@ -1369,17 +1635,13 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 	int status = *(int *)arg4;
 
 	if (is_clx_n_platform()) {
+		ret = 0;
 		if (status) {
-			ret = 0;
-			if (auto_mode)
-				set_clx_pbf_cpufreq_scaling_min_max(cpu);
+			set_clx_pbf_cpufreq_scaling_min_max(cpu);
 
 		} else {
-			ret = -1;
-			if (auto_mode) {
-				set_scaling_max_to_cpuinfo_max(cpu);
-				set_scaling_min_to_cpuinfo_min(cpu);
-			}
+			set_scaling_max_to_cpuinfo_max(cpu);
+			set_scaling_min_to_cpuinfo_min(cpu);
 		}
 		goto disp_result;
 	}
@@ -1392,7 +1654,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_set_pbf_fact_status(cpu, 1, status);
 	if (ret) {
-		perror("isst_set_pbf");
+		debug_printf("isst_set_pbf_fact_status failed");
 		if (auto_mode)
 			isst_pm_qos_config(cpu, 0, 0);
 	} else {
@@ -1405,7 +1667,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 	}
 
 	if (auto_mode && !status)
-		isst_pm_qos_config(cpu, 0, 0);
+		isst_pm_qos_config(cpu, 0, 1);
 
 disp_result:
 	if (status)
@@ -1424,10 +1686,25 @@ static void set_pbf_enable(int arg)
 		if (enable) {
 			fprintf(stderr,
 				"Enable Intel Speed Select Technology base frequency feature\n");
+			if (is_clx_n_platform()) {
+				fprintf(stderr,
+					"\tOn this platform this command doesn't enable feature in the hardware.\n");
+				fprintf(stderr,
+					"\tIt updates the cpufreq scaling_min_freq to match cpufreq base_frequency.\n");
+				exit(0);
+
+			}
 			fprintf(stderr,
 				"\tOptional Arguments: -a|--auto : Use priority of cores to set core-power associations\n");
 		} else {
 
+			if (is_clx_n_platform()) {
+				fprintf(stderr,
+					"\tOn this platform this command doesn't disable feature in the hardware.\n");
+				fprintf(stderr,
+					"\tIt updates the cpufreq scaling_min_freq to match cpuinfo_min_freq\n");
+				exit(0);
+			}
 			fprintf(stderr,
 				"Disable Intel Speed Select Technology base frequency feature\n");
 			fprintf(stderr,
@@ -1452,12 +1729,15 @@ static void dump_fact_config_for_cpu(int cpu, void *arg1, void *arg2,
 	struct isst_fact_info fact_info;
 	int ret;
 
-	ret = isst_get_fact_info(cpu, tdp_level, &fact_info);
-	if (ret)
-		perror("isst_get_fact_bucket_info");
-	else
+	ret = isst_get_fact_info(cpu, tdp_level, fact_bucket, &fact_info);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get turbo-freq info at this level", 1, tdp_level);
+		isst_ctdp_display_information_end(outf);
+		exit(1);
+	} else {
 		isst_fact_display_information(cpu, outf, tdp_level, fact_bucket,
 					      fact_avx, &fact_info);
+	}
 }
 
 static void dump_fact_config(int arg)
@@ -1475,7 +1755,7 @@ static void dump_fact_config(int arg)
 	}
 
 	if (tdp_level == 0xff) {
-		fprintf(outf, "Invalid command: specify tdp_level\n");
+		isst_display_error_info_message(1, "Invalid command: specify tdp_level\n", 0, 0);
 		exit(1);
 	}
 
@@ -1503,7 +1783,7 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_set_pbf_fact_status(cpu, 0, status);
 	if (ret) {
-		perror("isst_set_fact");
+		debug_printf("isst_set_pbf_fact_status failed");
 		if (auto_mode)
 			isst_pm_qos_config(cpu, 0, 0);
 
@@ -1527,6 +1807,8 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 disp_results:
 	if (status) {
 		isst_display_result(cpu, outf, "turbo-freq", "enable", ret);
+		if (ret)
+			fact_enable_fail = ret;
 	} else {
 		/* Since we modified TRL during Fact enable, restore it */
 		isst_set_trl_from_current_tdp(cpu, fact_trl);
@@ -1568,7 +1850,7 @@ static void set_fact_enable(int arg)
 					       NULL, &enable);
 	isst_ctdp_display_information_end(outf);
 
-	if (enable && auto_mode) {
+	if (!fact_enable_fail && enable && auto_mode) {
 		/*
 		 * When we adjust CLOS param, we have to set for siblings also.
 		 * So for the each user specified CPU, also add the sibling
@@ -1652,9 +1934,12 @@ static void enable_clos_qos_config(int cpu, void *arg1, void *arg2, void *arg3,
 	int ret;
 	int status = *(int *)arg4;
 
+	if (is_skx_based_platform())
+		clos_priority_type = 1;
+
 	ret = isst_pm_qos_config(cpu, status, clos_priority_type);
 	if (ret)
-		perror("isst_pm_qos_config");
+		isst_display_error_info_message(1, "isst_pm_qos_config failed", 0, 0);
 
 	if (status)
 		isst_display_result(cpu, outf, "core-power", "enable",
@@ -1672,9 +1957,11 @@ static void set_clos_enable(int arg)
 		if (enable) {
 			fprintf(stderr,
 				"Enable core-power for a package/die\n");
-			fprintf(stderr,
-				"\tClos Enable: Specify priority type with [--priority|-p]\n");
-			fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n");
+			if (!is_skx_based_platform()) {
+				fprintf(stderr,
+					"\tClos Enable: Specify priority type with [--priority|-p]\n");
+				fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n");
+			}
 		} else {
 			fprintf(stderr,
 				"Disable core-power: [No command arguments are required]\n");
@@ -1705,7 +1992,7 @@ static void dump_clos_config_for_cpu(int cpu, void *arg1, void *arg2,
 
 	ret = isst_pm_get_clos(cpu, current_clos, &clos_config);
 	if (ret)
-		perror("isst_pm_get_clos");
+		isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0);
 	else
 		isst_clos_display_information(cpu, outf, current_clos,
 					      &clos_config);
@@ -1721,7 +2008,8 @@ static void dump_clos_config(int arg)
 		exit(0);
 	}
 	if (current_clos < 0 || current_clos > 3) {
-		fprintf(stderr, "Invalid clos id\n");
+		isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
+		isst_ctdp_display_information_end(outf);
 		exit(0);
 	}
 
@@ -1742,9 +2030,14 @@ static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
 	if (ret)
-		perror("isst_clos_get_info");
-	else
-		isst_clos_display_clos_information(cpu, outf, enable, prio_type);
+		isst_display_error_info_message(1, "isst_clos_get_info failed", 0, 0);
+	else {
+		int cp_state, cp_cap;
+
+		isst_read_pm_config(cpu, &cp_state, &cp_cap);
+		isst_clos_display_clos_information(cpu, outf, enable, prio_type,
+						   cp_state, cp_cap);
+	}
 }
 
 static void dump_clos_info(int arg)
@@ -1752,19 +2045,17 @@ static void dump_clos_info(int arg)
 	if (cmd_help) {
 		fprintf(stderr,
 			"Print Intel Speed Select Technology core power information\n");
-		fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
-		exit(0);
-	}
-
-	if (!max_target_cpus) {
-		fprintf(stderr,
-			"Invalid target cpu. Specify with [-c|--cpu]\n");
+		fprintf(stderr, "\t Optionally specify targeted cpu id with [--cpu|-c]\n");
 		exit(0);
 	}
 
 	isst_ctdp_display_information_start(outf);
-	for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
-					  NULL, NULL, NULL);
+	if (max_target_cpus)
+		for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+						  NULL, NULL, NULL);
+	else
+		for_each_online_package_in_set(get_clos_info_for_cpu, NULL,
+					       NULL, NULL, NULL);
 	isst_ctdp_display_information_end(outf);
 
 }
@@ -1785,7 +2076,7 @@ static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 	clos_config.clos_desired = clos_desired;
 	ret = isst_set_clos(cpu, current_clos, &clos_config);
 	if (ret)
-		perror("isst_set_clos");
+		isst_display_error_info_message(1, "isst_set_clos failed", 0, 0);
 	else
 		isst_display_result(cpu, outf, "core-power", "config", ret);
 }
@@ -1797,26 +2088,27 @@ static void set_clos_config(int arg)
 			"Set core-power configuration for one of the four clos ids\n");
 		fprintf(stderr,
 			"\tSpecify targeted clos id with [--clos|-c]\n");
-		fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n");
-		fprintf(stderr,
-			"\tSpecify clos Proportional Priority [--weight|-w]\n");
+		if (!is_skx_based_platform()) {
+			fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n");
+			fprintf(stderr,
+				"\tSpecify clos Proportional Priority [--weight|-w]\n");
+		}
 		fprintf(stderr, "\tSpecify clos min in MHz with [--min|-n]\n");
 		fprintf(stderr, "\tSpecify clos max in MHz with [--max|-m]\n");
-		fprintf(stderr, "\tSpecify clos desired in MHz with [--desired|-d]\n");
 		exit(0);
 	}
 
 	if (current_clos < 0 || current_clos > 3) {
-		fprintf(stderr, "Invalid clos id\n");
+		isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
 		exit(0);
 	}
-	if (clos_epp < 0 || clos_epp > 0x0F) {
-		fprintf(stderr, "clos epp is not specified, default: 0\n");
+	if (!is_skx_based_platform() && (clos_epp < 0 || clos_epp > 0x0F)) {
+		fprintf(stderr, "clos epp is not specified or invalid, default: 0\n");
 		clos_epp = 0;
 	}
-	if (clos_prop_prio < 0 || clos_prop_prio > 0x0F) {
+	if (!is_skx_based_platform() && (clos_prop_prio < 0 || clos_prop_prio > 0x0F)) {
 		fprintf(stderr,
-			"clos frequency weight is not specified, default: 0\n");
+			"clos frequency weight is not specified or invalid, default: 0\n");
 		clos_prop_prio = 0;
 	}
 	if (clos_min < 0) {
@@ -1824,11 +2116,11 @@ static void set_clos_config(int arg)
 		clos_min = 0;
 	}
 	if (clos_max < 0) {
-		fprintf(stderr, "clos max is not specified, default: 25500 MHz\n");
+		fprintf(stderr, "clos max is not specified, default: Max frequency (ratio 0xff)\n");
 		clos_max = 0xff;
 	}
-	if (clos_desired < 0) {
-		fprintf(stderr, "clos desired is not specified, default: 0\n");
+	if (clos_desired) {
+		fprintf(stderr, "clos desired is not supported on this platform\n");
 		clos_desired = 0x00;
 	}
 
@@ -1849,7 +2141,7 @@ static void set_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_clos_associate(cpu, current_clos);
 	if (ret)
-		perror("isst_clos_associate");
+		debug_printf("isst_clos_associate failed");
 	else
 		isst_display_result(cpu, outf, "core-power", "assoc", ret);
 }
@@ -1860,19 +2152,22 @@ static void set_clos_assoc(int arg)
 		fprintf(stderr, "Associate a clos id to a CPU\n");
 		fprintf(stderr,
 			"\tSpecify targeted clos id with [--clos|-c]\n");
+		fprintf(stderr,
+			"\tFor example to associate clos 1 to CPU 0: issue\n");
+		fprintf(stderr,
+			"\tintel-speed-select --cpu 0 core-power assoc --clos 1\n");
 		exit(0);
 	}
 
 	if (current_clos < 0 || current_clos > 3) {
-		fprintf(stderr, "Invalid clos id\n");
+		isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
 		exit(0);
 	}
 	if (max_target_cpus)
 		for_each_online_target_cpu_in_set(set_clos_assoc_for_cpu, NULL,
 						  NULL, NULL, NULL);
 	else {
-		fprintf(stderr,
-			"Invalid target cpu. Specify with [-c|--cpu]\n");
+		isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0);
 	}
 }
 
@@ -1883,7 +2178,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 
 	ret = isst_clos_get_assoc_status(cpu, &clos);
 	if (ret)
-		perror("isst_clos_get_assoc_status");
+		isst_display_error_info_message(1, "isst_clos_get_assoc_status failed", 0, 0);
 	else
 		isst_clos_display_assoc_information(cpu, outf, clos);
 }
@@ -1897,8 +2192,7 @@ static void get_clos_assoc(int arg)
 	}
 
 	if (!max_target_cpus) {
-		fprintf(stderr,
-			"Invalid target cpu. Specify with [-c|--cpu]\n");
+		isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0);
 		exit(0);
 	}
 
@@ -2035,7 +2329,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
 	option_index = start;
 
 	optind = start + 1;
-	while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:hoa",
+	while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:r:hoa",
 				  long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -2061,7 +2355,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
 				fact_avx = 0x01;
 			} else if (!strncmp(optarg, "avx2", 4)) {
 				fact_avx = 0x02;
-			} else if (!strncmp(optarg, "avx512", 4)) {
+			} else if (!strncmp(optarg, "avx512", 6)) {
 				fact_avx = 0x04;
 			} else {
 				fprintf(outf, "Invalid sse,avx options\n");
@@ -2078,6 +2372,10 @@ static void parse_cmd_args(int argc, int start, char **argv)
 			break;
 		case 'e':
 			clos_epp = atoi(optarg);
+			if (is_skx_based_platform()) {
+				isst_display_error_info_message(1, "epp can't be specified on this platform", 0, 0);
+				exit(0);
+			}
 			break;
 		case 'n':
 			clos_min = atoi(optarg);
@@ -2089,14 +2387,25 @@ static void parse_cmd_args(int argc, int start, char **argv)
 			break;
 		case 'p':
 			clos_priority_type = atoi(optarg);
+			if (is_skx_based_platform() && !clos_priority_type) {
+				isst_display_error_info_message(1, "Invalid clos priority type: proportional for this platform", 0, 0);
+				exit(0);
+			}
 			break;
 		case 'w':
 			clos_prop_prio = atoi(optarg);
+			if (is_skx_based_platform()) {
+				isst_display_error_info_message(1, "weight can't be specified on this platform", 0, 0);
+				exit(0);
+			}
 			break;
 		default:
-			printf("no match\n");
+			printf("Unknown option: ignore\n");
 		}
 	}
+
+	if (argv[optind])
+		printf("Garbage at the end of command: ignore\n");
 }
 
 static void isst_help(void)
@@ -2214,11 +2523,18 @@ void process_command(int argc, char **argv,
 
 static void usage(void)
 {
-	printf("Intel(R) Speed Select Technology\n");
+	if (is_clx_n_platform()) {
+		fprintf(stderr, "\nThere is limited support of Intel Speed Select features on this platform.\n");
+		fprintf(stderr, "Everything is pre-configured using BIOS options, this tool can't enable any feature in the hardware.\n\n");
+	}
+
 	printf("\nUsage:\n");
 	printf("intel-speed-select [OPTIONS] FEATURE COMMAND COMMAND_ARGUMENTS\n");
-	printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features,\n");
-	printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n");
+	printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features:\n");
+	if (is_clx_n_platform())
+		printf("\nFEATURE : [perf-profile|base-freq]\n");
+	else
+		printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n");
 	printf("\nFor help on each feature, use -h|--help\n");
 	printf("\tFor example:  intel-speed-select perf-profile -h\n");
 
@@ -2231,17 +2547,29 @@ static void usage(void)
 	printf("\t\tDefault: Die scoped for all dies in the system with multiple dies/package\n");
 	printf("\t\t\t Or Package scoped for all Packages when each package contains one die\n");
 	printf("\t[-d|--debug] : Debug mode\n");
+	printf("\t[-f|--format] : output format [json|text]. Default: text\n");
 	printf("\t[-h|--help] : Print help\n");
 	printf("\t[-i|--info] : Print platform information\n");
 	printf("\t[-o|--out] : Output file\n");
 	printf("\t\t\tDefault : stderr\n");
-	printf("\t[-f|--format] : output format [json|text]. Default: text\n");
 	printf("\t[-v|--version] : Print version\n");
 
 	printf("\nResult format\n");
 	printf("\tResult display uses a common format for each command:\n");
 	printf("\tResults are formatted in text/JSON with\n");
 	printf("\t\tPackage, Die, CPU, and command specific results.\n");
+
+	printf("\nExamples\n");
+	printf("\tTo get platform information:\n");
+	printf("\t\tintel-speed-select --info\n");
+	printf("\tTo get full perf-profile information dump:\n");
+	printf("\t\tintel-speed-select perf-profile info\n");
+	printf("\tTo get full base-freq information dump:\n");
+	printf("\t\tintel-speed-select base-freq info -l 0\n");
+	if (!is_clx_n_platform()) {
+		printf("\tTo get full turbo-freq information dump:\n");
+		printf("\t\tintel-speed-select turbo-freq info -l 0\n");
+	}
 	exit(1);
 }
 
@@ -2254,6 +2582,8 @@ static void print_version(void)
 
 static void cmdline(int argc, char **argv)
 {
+	const char *pathname = "/dev/isst_interface";
+	FILE *fp;
 	int opt;
 	int option_index = 0;
 	int ret;
@@ -2269,6 +2599,28 @@ static void cmdline(int argc, char **argv)
 		{ 0, 0, 0, 0 }
 	};
 
+	if (geteuid() != 0) {
+		fprintf(stderr, "Must run as root\n");
+		exit(0);
+	}
+
+	ret = update_cpu_model();
+	if (ret)
+		err(-1, "Invalid CPU model (%d)\n", cpu_model);
+	printf("Intel(R) Speed Select Technology\n");
+	printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model);
+
+	if (!is_clx_n_platform()) {
+		fp = fopen(pathname, "rb");
+		if (!fp) {
+			fprintf(stderr, "Intel speed select drivers are not loaded on this system.\n");
+			fprintf(stderr, "Verify that kernel config includes CONFIG_INTEL_SPEED_SELECT_INTERFACE.\n");
+			fprintf(stderr, "If the config is included then this is not a supported platform.\n");
+			exit(0);
+		}
+		fclose(fp);
+	}
+
 	progname = argv[0];
 	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options,
 				       &option_index)) != -1) {
@@ -2303,21 +2655,12 @@ static void cmdline(int argc, char **argv)
 		}
 	}
 
-	if (geteuid() != 0) {
-		fprintf(stderr, "Must run as root\n");
-		exit(0);
-	}
-
 	if (optind > (argc - 2)) {
-		fprintf(stderr, "Feature name and|or command not specified\n");
+		usage();
 		exit(0);
 	}
-	ret = update_cpu_model();
-	if (ret)
-		err(-1, "Invalid CPU model (%d)\n", cpu_model);
-	printf("Intel(R) Speed Select Technology\n");
-	printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model);
 	set_max_cpu_num();
+	store_cpu_topology();
 	set_cpu_present_cpu_mask();
 	set_cpu_target_cpu_mask();
 
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 81a119f688a3..67c9b1139631 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -114,8 +114,10 @@ int isst_get_tdp_info(int cpu, int config_index,
 
 	ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_TDP_INFO,
 				     0, config_index, &resp);
-	if (ret)
+	if (ret) {
+		isst_display_error_info_message(1, "Invalid level, Can't get TDP information at level", 1, config_index);
 		return ret;
+	}
 
 	ctdp_level->pkg_tdp = resp & GENMASK(14, 0);
 	ctdp_level->tdp_ratio = (resp & GENMASK(23, 16)) >> 16;
@@ -352,7 +354,7 @@ int isst_set_tdp_level_msr(int cpu, int tdp_level)
 	debug_printf("cpu: tdp_level via MSR %d\n", cpu, tdp_level);
 
 	if (isst_get_config_tdp_lock_status(cpu)) {
-		debug_printf("cpu: tdp_locked %d\n", cpu);
+		isst_display_error_info_message(1, "tdp_locked", 0, 0);
 		return -1;
 	}
 
@@ -373,19 +375,50 @@ int isst_set_tdp_level(int cpu, int tdp_level)
 	unsigned int resp;
 	int ret;
 
+
+	if (isst_get_config_tdp_lock_status(cpu)) {
+		isst_display_error_info_message(1, "TDP is locked", 0, 0);
+		return -1;
+
+	}
+
 	ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_SET_LEVEL, 0,
 				     tdp_level, &resp);
-	if (ret)
-		return isst_set_tdp_level_msr(cpu, tdp_level);
+	if (ret) {
+		isst_display_error_info_message(1, "Set TDP level failed for level", 1, tdp_level);
+		return ret;
+	}
 
 	return 0;
 }
 
 int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
 {
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	struct isst_pkg_ctdp pkg_dev;
 	int i, ret, core_cnt, max;
 	unsigned int req, resp;
 
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+		return ret;
+	}
+
+	if (level > pkg_dev.levels) {
+		isst_display_error_info_message(1, "Invalid level", 1, level);
+		return -1;
+	}
+
+	ret = isst_get_ctdp_control(cpu, level, &ctdp_level);
+	if (ret)
+		return ret;
+
+	if (!ctdp_level.pbf_support) {
+		isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, level);
+		return -1;
+	}
+
 	pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
 
 	core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
@@ -481,6 +514,10 @@ int isst_set_pbf_fact_status(int cpu, int pbf, int enable)
 		else
 			req &= ~BIT(17);
 	} else {
+
+		if (enable && !ctdp_level.sst_cp_enabled)
+			isst_display_error_info_message(0, "Make sure to execute before: core-power enable", 0, 0);
+
 		if (ctdp_level.pbf_enabled)
 			req = BIT(17);
 
@@ -566,10 +603,32 @@ int isst_get_fact_bucket_info(int cpu, int level,
 	return 0;
 }
 
-int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info)
+int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_info *fact_info)
 {
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	struct isst_pkg_ctdp pkg_dev;
 	unsigned int resp;
-	int ret;
+	int j, ret, print;
+
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+		return ret;
+	}
+
+	if (level > pkg_dev.levels) {
+		isst_display_error_info_message(1, "Invalid level", 1, level);
+		return -1;
+	}
+
+	ret = isst_get_ctdp_control(cpu, level, &ctdp_level);
+	if (ret)
+		return ret;
+
+	if (!ctdp_level.fact_support) {
+		isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, level);
+		return -1;
+	}
 
 	ret = isst_send_mbox_command(cpu, CONFIG_TDP,
 				     CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO, 0,
@@ -585,8 +644,25 @@ int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info)
 	fact_info->lp_clipping_ratio_license_avx512 = (resp >> 16) & 0xff;
 
 	ret = isst_get_fact_bucket_info(cpu, level, fact_info->bucket_info);
+	if (ret)
+		return ret;
 
-	return ret;
+	print = 0;
+	for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
+		if (fact_bucket != 0xff && fact_bucket != j)
+			continue;
+
+		if (!fact_info->bucket_info[j].high_priority_cores_count)
+			break;
+
+		print = 1;
+	}
+	if (!print) {
+		isst_display_error_info_message(1, "Invalid bucket", 0, 0);
+		return -1;
+	}
+
+	return 0;
 }
 
 int isst_set_trl(int cpu, unsigned long long trl)
@@ -671,7 +747,7 @@ void isst_get_process_ctdp_complete(int cpu, struct isst_pkg_ctdp *pkg_dev)
 
 int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 {
-	int i, ret;
+	int i, ret, valid = 0;
 
 	if (pkg_dev->processed)
 		return 0;
@@ -684,6 +760,14 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 		     cpu, pkg_dev->enabled, pkg_dev->current_level,
 		     pkg_dev->levels);
 
+	if (tdp_level != 0xff && tdp_level > pkg_dev->levels) {
+		isst_display_error_info_message(1, "Invalid level", 0, 0);
+		return -1;
+	}
+
+	if (!pkg_dev->enabled)
+		isst_display_error_info_message(0, "perf-profile feature is not supported, just base-config level 0 is valid", 0, 0);
+
 	for (i = 0; i <= pkg_dev->levels; ++i) {
 		struct isst_pkg_ctdp_level_info *ctdp_level;
 
@@ -703,6 +787,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 		if (ret)
 			continue;
 
+		valid = 1;
 		pkg_dev->processed = 1;
 		ctdp_level->processed = 1;
 
@@ -713,7 +798,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 		}
 
 		if (ctdp_level->fact_support) {
-			ret = isst_get_fact_info(cpu, i,
+			ret = isst_get_fact_info(cpu, i, 0xff,
 						 &ctdp_level->fact_info);
 			if (ret)
 				return ret;
@@ -775,6 +860,9 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 		isst_get_uncore_mem_freq(cpu, i, ctdp_level);
 	}
 
+	if (!valid)
+		isst_display_error_info_message(0, "Invalid level, Can't get TDP control information at specified levels on cpu", 1, cpu);
+
 	return 0;
 }
 
@@ -829,17 +917,19 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 		}
 		ret = isst_write_pm_config(cpu, 0);
 		if (ret)
-			perror("isst_write_pm_config\n");
+			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
 	} else {
 		ret = isst_write_pm_config(cpu, 1);
 		if (ret)
-			perror("isst_write_pm_config\n");
+			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
 	}
 
 	ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
 				     &resp);
-	if (ret)
+	if (ret) {
+		isst_display_error_info_message(1, "CLOS_PM_QOS_CONFIG command failed", 0, 0);
 		return ret;
+	}
 
 	debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
 
@@ -850,6 +940,9 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 	else
 		req = req & ~BIT(1);
 
+	if (priority_type > 1)
+		isst_display_error_info_message(1, "Invalid priority type: Changing type to ordered", 0, 0);
+
 	if (priority_type)
 		req = req | BIT(2);
 	else
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 4fb0c1d49d64..51dbaa5f02ec 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -158,10 +158,17 @@ static void format_and_print(FILE *outf, int level, char *header, char *value)
 	last_level = level;
 }
 
-static void print_package_info(int cpu, FILE *outf)
+static int print_package_info(int cpu, FILE *outf)
 {
 	char header[256];
 
+	if (out_format_is_json()) {
+		snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d",
+			 get_physical_package_id(cpu), get_physical_die_id(cpu),
+			 cpu);
+		format_and_print(outf, 1, header, NULL);
+		return 1;
+	}
 	snprintf(header, sizeof(header), "package-%d",
 		 get_physical_package_id(cpu));
 	format_and_print(outf, 1, header, NULL);
@@ -169,6 +176,8 @@ static void print_package_info(int cpu, FILE *outf)
 	format_and_print(outf, 2, header, NULL);
 	snprintf(header, sizeof(header), "cpu-%d", cpu);
 	format_and_print(outf, 3, header, NULL);
+
+	return 3;
 }
 
 static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -178,7 +187,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
 	char header[256];
 	char value[256];
 
-	snprintf(header, sizeof(header), "speed-select-base-freq");
+	snprintf(header, sizeof(header), "speed-select-base-freq-properties");
 	format_and_print(outf, disp_level, header, NULL);
 
 	snprintf(header, sizeof(header), "high-priority-base-frequency(MHz)");
@@ -222,9 +231,23 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
 	struct isst_fact_bucket_info *bucket_info = fact_info->bucket_info;
 	char header[256];
 	char value[256];
-	int j;
+	int print = 0, j;
+
+	for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
+		if (fact_bucket != 0xff && fact_bucket != j)
+			continue;
+
+		if (!bucket_info[j].high_priority_cores_count)
+			break;
 
-	snprintf(header, sizeof(header), "speed-select-turbo-freq");
+		print = 1;
+	}
+	if (!print) {
+		fprintf(stderr, "Invalid bucket\n");
+		return;
+	}
+
+	snprintf(header, sizeof(header), "speed-select-turbo-freq-properties");
 	format_and_print(outf, base_level, header, NULL);
 	for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
 		if (fact_bucket != 0xff && fact_bucket != j)
@@ -289,7 +312,7 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
 }
 
 void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
-				 unsigned int val)
+				 unsigned int val, char *str0, char *str1)
 {
 	char header[256];
 	char value[256];
@@ -301,8 +324,12 @@ void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
 	format_and_print(outf, 2, header, NULL);
 	snprintf(header, sizeof(header), "cpu-%d", cpu);
 	format_and_print(outf, 3, header, NULL);
-
-	snprintf(value, sizeof(value), "%u", val);
+	if (str0 && !val)
+		snprintf(value, sizeof(value), "%s", str0);
+	else if (str1 && val)
+		snprintf(value, sizeof(value), "%s", str1);
+	else
+		snprintf(value, sizeof(value), "%u", val);
 	format_and_print(outf, 4, prefix, value);
 
 	format_and_print(outf, 1, NULL, NULL);
@@ -313,10 +340,11 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 {
 	char header[256];
 	char value[256];
-	int i, base_level = 1;
+	static int level;
+	int i;
 
 	if (pkg_dev->processed)
-		print_package_info(cpu, outf);
+		level = print_package_info(cpu, outf);
 
 	for (i = 0; i <= pkg_dev->levels; ++i) {
 		struct isst_pkg_ctdp_level_info *ctdp_level;
@@ -328,72 +356,80 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 
 		snprintf(header, sizeof(header), "perf-profile-level-%d",
 			 ctdp_level->level);
-		format_and_print(outf, base_level + 3, header, NULL);
+		format_and_print(outf, level + 1, header, NULL);
 
 		snprintf(header, sizeof(header), "cpu-count");
 		j = get_cpu_count(get_physical_die_id(cpu),
 				  get_physical_die_id(cpu));
 		snprintf(value, sizeof(value), "%d", j);
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
+
+		j = CPU_COUNT_S(ctdp_level->core_cpumask_size,
+				ctdp_level->core_cpumask);
+		if (j) {
+			snprintf(header, sizeof(header), "enable-cpu-count");
+			snprintf(value, sizeof(value), "%d", j);
+			format_and_print(outf, level + 2, header, value);
+		}
 
 		if (ctdp_level->core_cpumask_size) {
 			snprintf(header, sizeof(header), "enable-cpu-mask");
 			printcpumask(sizeof(value), value,
 				     ctdp_level->core_cpumask_size,
 				     ctdp_level->core_cpumask);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 
 			snprintf(header, sizeof(header), "enable-cpu-list");
 			printcpulist(sizeof(value), value,
 				     ctdp_level->core_cpumask_size,
 				     ctdp_level->core_cpumask);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		snprintf(header, sizeof(header), "thermal-design-power-ratio");
 		snprintf(value, sizeof(value), "%d", ctdp_level->tdp_ratio);
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
 
 		snprintf(header, sizeof(header), "base-frequency(MHz)");
 		if (!ctdp_level->sse_p1)
 			ctdp_level->sse_p1 = ctdp_level->tdp_ratio;
 		snprintf(value, sizeof(value), "%d",
 			  ctdp_level->sse_p1 * DISP_FREQ_MULTIPLIER);
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
 
 		if (ctdp_level->avx2_p1) {
 			snprintf(header, sizeof(header), "base-frequency-avx2(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->avx2_p1 * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		if (ctdp_level->avx512_p1) {
 			snprintf(header, sizeof(header), "base-frequency-avx512(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->avx512_p1 * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		if (ctdp_level->uncore_p1) {
 			snprintf(header, sizeof(header), "uncore-frequency-min(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->uncore_p1 * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		if (ctdp_level->uncore_p0) {
 			snprintf(header, sizeof(header), "uncore-frequency-max(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->uncore_p0 * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		if (ctdp_level->mem_freq) {
 			snprintf(header, sizeof(header), "mem-frequency(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		snprintf(header, sizeof(header),
@@ -405,7 +441,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 				snprintf(value, sizeof(value), "disabled");
 		} else
 			snprintf(value, sizeof(value), "unsupported");
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
 
 		snprintf(header, sizeof(header),
 			 "speed-select-base-freq");
@@ -416,7 +452,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 				snprintf(value, sizeof(value), "disabled");
 		} else
 			snprintf(value, sizeof(value), "unsupported");
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
 
 		snprintf(header, sizeof(header),
 			 "speed-select-core-power");
@@ -427,110 +463,115 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 				snprintf(value, sizeof(value), "disabled");
 		} else
 			snprintf(value, sizeof(value), "unsupported");
-		format_and_print(outf, base_level + 4, header, value);
+		format_and_print(outf, level + 2, header, value);
 
 		if (is_clx_n_platform()) {
 			if (ctdp_level->pbf_support)
 				_isst_pbf_display_information(cpu, outf,
 							      tdp_level,
 							  &ctdp_level->pbf_info,
-							      base_level + 4);
+							      level + 1);
 			continue;
 		}
 
 		if (ctdp_level->pkg_tdp) {
 			snprintf(header, sizeof(header), "thermal-design-power(W)");
 			snprintf(value, sizeof(value), "%d", ctdp_level->pkg_tdp);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		if (ctdp_level->t_proc_hot) {
 			snprintf(header, sizeof(header), "tjunction-max(C)");
 			snprintf(value, sizeof(value), "%d", ctdp_level->t_proc_hot);
-			format_and_print(outf, base_level + 4, header, value);
+			format_and_print(outf, level + 2, header, value);
 		}
 
 		snprintf(header, sizeof(header), "turbo-ratio-limits-sse");
-		format_and_print(outf, base_level + 4, header, NULL);
+		format_and_print(outf, level + 2, header, NULL);
 		for (j = 0; j < 8; ++j) {
 			snprintf(header, sizeof(header), "bucket-%d", j);
-			format_and_print(outf, base_level + 5, header, NULL);
+			format_and_print(outf, level + 3, header, NULL);
 
 			snprintf(header, sizeof(header), "core-count");
 			snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
-			format_and_print(outf, base_level + 6, header, value);
+			format_and_print(outf, level + 4, header, value);
 
 			snprintf(header, sizeof(header),
 				"max-turbo-frequency(MHz)");
 			snprintf(value, sizeof(value), "%d",
 				 ctdp_level->trl_sse_active_cores[j] *
 				  DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 6, header, value);
+			format_and_print(outf, level + 4, header, value);
 		}
 
 		if (ctdp_level->trl_avx_active_cores[0]) {
 			snprintf(header, sizeof(header), "turbo-ratio-limits-avx2");
-			format_and_print(outf, base_level + 4, header, NULL);
+			format_and_print(outf, level + 2, header, NULL);
 			for (j = 0; j < 8; ++j) {
 				snprintf(header, sizeof(header), "bucket-%d", j);
-				format_and_print(outf, base_level + 5, header, NULL);
+				format_and_print(outf, level + 3, header, NULL);
 
 				snprintf(header, sizeof(header), "core-count");
 				snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
-				format_and_print(outf, base_level + 6, header, value);
+				format_and_print(outf, level + 4, header, value);
 
 				snprintf(header, sizeof(header), "max-turbo-frequency(MHz)");
 				snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_active_cores[j] * DISP_FREQ_MULTIPLIER);
-				format_and_print(outf, base_level + 6, header, value);
+				format_and_print(outf, level + 4, header, value);
 			}
 		}
 
 		if (ctdp_level->trl_avx_512_active_cores[0]) {
 			snprintf(header, sizeof(header), "turbo-ratio-limits-avx512");
-			format_and_print(outf, base_level + 4, header, NULL);
+			format_and_print(outf, level + 2, header, NULL);
 			for (j = 0; j < 8; ++j) {
 				snprintf(header, sizeof(header), "bucket-%d", j);
-				format_and_print(outf, base_level + 5, header, NULL);
+				format_and_print(outf, level + 3, header, NULL);
 
 				snprintf(header, sizeof(header), "core-count");
 				snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
-				format_and_print(outf, base_level + 6, header, value);
+				format_and_print(outf, level + 4, header, value);
 
 				snprintf(header, sizeof(header), "max-turbo-frequency(MHz)");
 				snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_512_active_cores[j] * DISP_FREQ_MULTIPLIER);
-			format_and_print(outf, base_level + 6, header, value);
+				format_and_print(outf, level + 4, header, value);
 			}
 		}
 
 		if (ctdp_level->pbf_support)
 			_isst_pbf_display_information(cpu, outf, i,
 						      &ctdp_level->pbf_info,
-						      base_level + 4);
+						      level + 2);
 		if (ctdp_level->fact_support)
 			_isst_fact_display_information(cpu, outf, i, 0xff, 0xff,
 						       &ctdp_level->fact_info,
-						       base_level + 4);
+						       level + 2);
 	}
 
 	format_and_print(outf, 1, NULL, NULL);
 }
 
+static int start;
 void isst_ctdp_display_information_start(FILE *outf)
 {
 	last_level = 0;
 	format_and_print(outf, 0, "start", NULL);
+	start = 1;
 }
 
 void isst_ctdp_display_information_end(FILE *outf)
 {
 	format_and_print(outf, 0, NULL, NULL);
+	start = 0;
 }
 
 void isst_pbf_display_information(int cpu, FILE *outf, int level,
 				  struct isst_pbf_info *pbf_info)
 {
-	print_package_info(cpu, outf);
-	_isst_pbf_display_information(cpu, outf, level, pbf_info, 4);
+	int _level;
+
+	_level = print_package_info(cpu, outf);
+	_isst_pbf_display_information(cpu, outf, level, pbf_info, _level + 1);
 	format_and_print(outf, 1, NULL, NULL);
 }
 
@@ -538,9 +579,11 @@ void isst_fact_display_information(int cpu, FILE *outf, int level,
 				   int fact_bucket, int fact_avx,
 				   struct isst_fact_info *fact_info)
 {
-	print_package_info(cpu, outf);
+	int _level;
+
+	_level = print_package_info(cpu, outf);
 	_isst_fact_display_information(cpu, outf, level, fact_bucket, fact_avx,
-				       fact_info, 4);
+				       fact_info, _level + 1);
 	format_and_print(outf, 1, NULL, NULL);
 }
 
@@ -549,94 +592,103 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos,
 {
 	char header[256];
 	char value[256];
+	int level;
 
-	snprintf(header, sizeof(header), "package-%d",
-		 get_physical_package_id(cpu));
-	format_and_print(outf, 1, header, NULL);
-	snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
-	format_and_print(outf, 2, header, NULL);
-	snprintf(header, sizeof(header), "cpu-%d", cpu);
-	format_and_print(outf, 3, header, NULL);
+	level = print_package_info(cpu, outf);
 
 	snprintf(header, sizeof(header), "core-power");
-	format_and_print(outf, 4, header, NULL);
+	format_and_print(outf, level + 1, header, NULL);
 
 	snprintf(header, sizeof(header), "clos");
 	snprintf(value, sizeof(value), "%d", clos);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "epp");
 	snprintf(value, sizeof(value), "%d", clos_config->epp);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "clos-proportional-priority");
 	snprintf(value, sizeof(value), "%d", clos_config->clos_prop_prio);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "clos-min");
 	snprintf(value, sizeof(value), "%d MHz", clos_config->clos_min * DISP_FREQ_MULTIPLIER);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "clos-max");
-	snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER);
-	format_and_print(outf, 5, header, value);
+	if (clos_config->clos_max == 0xff)
+		snprintf(value, sizeof(value), "Max Turbo frequency");
+	else
+		snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER);
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "clos-desired");
 	snprintf(value, sizeof(value), "%d MHz", clos_config->clos_desired * DISP_FREQ_MULTIPLIER);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
-	format_and_print(outf, 1, NULL, NULL);
+	format_and_print(outf, level, NULL, NULL);
 }
 
 void isst_clos_display_clos_information(int cpu, FILE *outf,
-					int clos_enable, int type)
+					int clos_enable, int type,
+					int state, int cap)
 {
 	char header[256];
 	char value[256];
+	int level;
 
-	snprintf(header, sizeof(header), "package-%d",
-		 get_physical_package_id(cpu));
-	format_and_print(outf, 1, header, NULL);
-	snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
-	format_and_print(outf, 2, header, NULL);
-	snprintf(header, sizeof(header), "cpu-%d", cpu);
-	format_and_print(outf, 3, header, NULL);
+	level = print_package_info(cpu, outf);
 
 	snprintf(header, sizeof(header), "core-power");
-	format_and_print(outf, 4, header, NULL);
+	format_and_print(outf, level + 1, header, NULL);
+
+	snprintf(header, sizeof(header), "support-status");
+	if (cap)
+		snprintf(value, sizeof(value), "supported");
+	else
+		snprintf(value, sizeof(value), "unsupported");
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "enable-status");
-	snprintf(value, sizeof(value), "%d", clos_enable);
-	format_and_print(outf, 5, header, value);
+	if (state)
+		snprintf(value, sizeof(value), "enabled");
+	else
+		snprintf(value, sizeof(value), "disabled");
+	format_and_print(outf, level + 2, header, value);
+
+	snprintf(header, sizeof(header), "clos-enable-status");
+	if (clos_enable)
+		snprintf(value, sizeof(value), "enabled");
+	else
+		snprintf(value, sizeof(value), "disabled");
+	format_and_print(outf, level + 2, header, value);
 
 	snprintf(header, sizeof(header), "priority-type");
-	snprintf(value, sizeof(value), "%d", type);
-	format_and_print(outf, 5, header, value);
+	if (type)
+		snprintf(value, sizeof(value), "ordered");
+	else
+		snprintf(value, sizeof(value), "proportional");
+	format_and_print(outf, level + 2, header, value);
 
-	format_and_print(outf, 1, NULL, NULL);
+	format_and_print(outf, level, NULL, NULL);
 }
 
 void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
 {
 	char header[256];
 	char value[256];
+	int level;
 
-	snprintf(header, sizeof(header), "package-%d",
-		 get_physical_package_id(cpu));
-	format_and_print(outf, 1, header, NULL);
-	snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
-	format_and_print(outf, 2, header, NULL);
-	snprintf(header, sizeof(header), "cpu-%d", cpu);
-	format_and_print(outf, 3, header, NULL);
+	level = print_package_info(cpu, outf);
 
 	snprintf(header, sizeof(header), "get-assoc");
-	format_and_print(outf, 4, header, NULL);
+	format_and_print(outf, level + 1, header, NULL);
 
 	snprintf(header, sizeof(header), "clos");
 	snprintf(value, sizeof(value), "%d", clos);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
 
-	format_and_print(outf, 1, NULL, NULL);
+	format_and_print(outf, level, NULL, NULL);
 }
 
 void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
@@ -644,24 +696,60 @@ void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
 {
 	char header[256];
 	char value[256];
+	int level = 3;
+
+	if (cpu >= 0)
+		level = print_package_info(cpu, outf);
 
-	if (cpu >= 0) {
-		snprintf(header, sizeof(header), "package-%d",
-			 get_physical_package_id(cpu));
-		format_and_print(outf, 1, header, NULL);
-		snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
-		format_and_print(outf, 2, header, NULL);
-		snprintf(header, sizeof(header), "cpu-%d", cpu);
-		format_and_print(outf, 3, header, NULL);
-	}
 	snprintf(header, sizeof(header), "%s", feature);
-	format_and_print(outf, 4, header, NULL);
+	format_and_print(outf, level + 1, header, NULL);
 	snprintf(header, sizeof(header), "%s", cmd);
 	if (!result)
 		snprintf(value, sizeof(value), "success");
 	else
 		snprintf(value, sizeof(value), "failed(error %d)", result);
-	format_and_print(outf, 5, header, value);
+	format_and_print(outf, level + 2, header, value);
+
+	format_and_print(outf, level, NULL, NULL);
+}
+
+void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg)
+{
+	FILE *outf = get_output_file();
+	static int error_index;
+	char header[256];
+	char value[256];
+
+	if (!out_format_is_json()) {
+		if (arg_valid)
+			snprintf(value, sizeof(value), "%s %d", msg, arg);
+		else
+			snprintf(value, sizeof(value), "%s", msg);
+
+		if (error)
+			fprintf(outf, "Error: %s\n", value);
+		else
+			fprintf(outf, "Information: %s\n", value);
+		return;
+	}
+
+	if (!start)
+		format_and_print(outf, 0, "start", NULL);
+
+	if (error)
+		snprintf(header, sizeof(header), "Error%d", error_index++);
+	else
+		snprintf(header, sizeof(header), "Information:%d", error_index++);
+	format_and_print(outf, 1, header, NULL);
+
+	snprintf(header, sizeof(header), "message");
+	if (arg_valid)
+		snprintf(value, sizeof(value), "%s %d", msg, arg);
+	else
+		snprintf(value, sizeof(value), "%s", msg);
 
+	format_and_print(outf, 2, header, value);
 	format_and_print(outf, 1, NULL, NULL);
+	if (!start)
+		format_and_print(outf, 0, NULL, NULL);
 }
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index ad5aa6341d0f..2e1afd856a78 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -172,6 +172,7 @@ extern int get_cpu_count(int pkg_id, int die_id);
 extern int get_core_count(int pkg_id, int die_id);
 
 /* Common interfaces */
+FILE *get_output_file(void);
 extern void debug_printf(const char *format, ...);
 extern int out_format_is_json(void);
 extern int get_physical_package_id(int cpu);
@@ -196,6 +197,8 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command,
 				 int write, unsigned long long *req_resp);
 
 extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev);
+extern int isst_get_ctdp_control(int cpu, int config_index,
+				 struct isst_pkg_ctdp_level_info *ctdp_level);
 extern int isst_get_coremask_info(int cpu, int config_index,
 			   struct isst_pkg_ctdp_level_info *ctdp_level);
 extern int isst_get_process_ctdp(int cpu, int tdp_level,
@@ -205,7 +208,7 @@ extern void isst_get_process_ctdp_complete(int cpu,
 extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 					  struct isst_pkg_ctdp *pkg_dev);
 extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
-					unsigned int val);
+					unsigned int val, char *str0, char *str1);
 extern void isst_ctdp_display_information_start(FILE *outf);
 extern void isst_ctdp_display_information_end(FILE *outf);
 extern void isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -216,7 +219,7 @@ extern int isst_set_pbf_fact_status(int cpu, int pbf, int enable);
 extern int isst_get_pbf_info(int cpu, int level,
 			     struct isst_pbf_info *pbf_info);
 extern void isst_get_pbf_info_complete(struct isst_pbf_info *pbf_info);
-extern int isst_get_fact_info(int cpu, int level,
+extern int isst_get_fact_info(int cpu, int level, int fact_bucket,
 			      struct isst_fact_info *fact_info);
 extern int isst_get_fact_bucket_info(int cpu, int level,
 				     struct isst_fact_bucket_info *bucket_info);
@@ -245,7 +248,10 @@ extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
 
 extern int isst_clos_get_clos_information(int cpu, int *enable, int *type);
 extern void isst_clos_display_clos_information(int cpu, FILE *outf,
-					       int clos_enable, int type);
+					       int clos_enable, int type,
+					       int state, int cap);
 extern int is_clx_n_platform(void);
 extern int get_cpufreq_base_freq(int cpu);
+extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
+extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
 #endif
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 256199c7a182..3c47865bb247 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -235,7 +235,6 @@ def plot_duration_cpu():
     output_png = 'all_cpu_durations.png'
     g_plot = common_all_gnuplot_settings(output_png)
 #   autoscale this one, no set y range
-    g_plot('set ytics 0, 500')
     g_plot('set ylabel "Timer Duration (MilliSeconds)"')
     g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now()))
 
diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore
index 7521370d3568..e13109b43cd1 100644
--- a/tools/power/x86/turbostat/.gitignore
+++ b/tools/power/x86/turbostat/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 turbostat
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 13f1e8b9ac52..2b6551269e43 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -16,7 +16,7 @@ override CFLAGS +=	-D_FORTIFY_SOURCE=2
 
 %: %.c
 	@mkdir -p $(BUILD_OUTPUT)
-	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
+	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
 
 .PHONY : clean
 clean :
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 31c1ca0bb3ee..33b370865d16 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -30,7 +30,7 @@
 #include <sched.h>
 #include <time.h>
 #include <cpuid.h>
-#include <linux/capability.h>
+#include <sys/capability.h>
 #include <errno.h>
 #include <math.h>
 
@@ -304,6 +304,10 @@ int *irqs_per_cpu;		/* indexed by cpu_num */
 
 void setup_all_buffers(void);
 
+char *sys_lpi_file;
+char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
+char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
+
 int cpu_is_not_present(int cpu)
 {
 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
@@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void)
  *
  * record snapshot of
  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
- *
- * return 1 if config change requires a restart, else return 0
  */
 int snapshot_cpu_lpi_us(void)
 {
@@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void)
 /*
  * snapshot_sys_lpi()
  *
- * record snapshot of
- * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
- *
- * return 1 if config change requires a restart, else return 0
+ * record snapshot of sys_lpi_file
  */
 int snapshot_sys_lpi_us(void)
 {
 	FILE *fp;
 	int retval;
 
-	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+	fp = fopen_or_die(sys_lpi_file, "r");
 
 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
 	if (retval != 1) {
@@ -3151,28 +3150,42 @@ void check_dev_msr()
 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
 }
 
-void check_permissions()
+/*
+ * check for CAP_SYS_RAWIO
+ * return 0 on success
+ * return 1 on fail
+ */
+int check_for_cap_sys_rawio(void)
 {
-	struct __user_cap_header_struct cap_header_data;
-	cap_user_header_t cap_header = &cap_header_data;
-	struct __user_cap_data_struct cap_data_data;
-	cap_user_data_t cap_data = &cap_data_data;
-	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
-	int do_exit = 0;
-	char pathname[32];
+	cap_t caps;
+	cap_flag_value_t cap_flag_value;
 
-	/* check for CAP_SYS_RAWIO */
-	cap_header->pid = getpid();
-	cap_header->version = _LINUX_CAPABILITY_VERSION;
-	if (capget(cap_header, cap_data) < 0)
-		err(-6, "capget(2) failed");
+	caps = cap_get_proc();
+	if (caps == NULL)
+		err(-6, "cap_get_proc\n");
 
-	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
-		do_exit++;
+	if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
+		err(-6, "cap_get\n");
+
+	if (cap_flag_value != CAP_SET) {
 		warnx("capget(CAP_SYS_RAWIO) failed,"
 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
+		return 1;
 	}
 
+	if (cap_free(caps) == -1)
+		err(-6, "cap_free\n");
+
+	return 0;
+}
+void check_permissions(void)
+{
+	int do_exit = 0;
+	char pathname[32];
+
+	/* check for CAP_SYS_RAWIO */
+	do_exit += check_for_cap_sys_rawio();
+
 	/* test file permissions */
 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
 	if (euidaccess(pathname, R_OK)) {
@@ -3265,6 +3278,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
+	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
 		pkg_cstate_limits = glm_pkg_cstate_limits;
 		break;
 	default:
@@ -3336,6 +3350,17 @@ int is_skx(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_ehl(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_TREMONT:
+		return 1;
+	}
+	return 0;
+}
 
 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3478,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	dump_nhm_cst_cfg();
 }
 
+static void dump_sysfs_file(char *path)
+{
+	FILE *input;
+	char cpuidle_buf[64];
+
+	input = fopen(path, "r");
+	if (input == NULL) {
+		if (debug)
+			fprintf(outf, "NSFOD %s\n", path);
+		return;
+	}
+	if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+		err(1, "%s: failed to read file", path);
+	fclose(input);
+
+	fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
+}
 static void
 dump_sysfs_cstate_config(void)
 {
@@ -3491,6 +3533,15 @@ dump_sysfs_cstate_config(void)
 	if (!DO_BIC(BIC_sysfs))
 		return;
 
+	if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
+		fprintf(outf, "cpuidle not loaded\n");
+		return;
+	}
+
+	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
+	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
+	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
+
 	for (state = 0; state < 10; ++state) {
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
@@ -3894,6 +3945,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
 		else
 			BIC_PRESENT(BIC_PkgWatt);
 		break;
+	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+			BIC_PRESENT(BIC_GFX_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+			BIC_PRESENT(BIC_GFXWatt);
+		}
+		break;
 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4295,6 +4360,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_ATOM_GOLDMONT:		/* BXT */
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
+	case INTEL_FAM6_ATOM_TREMONT:		/* EHL */
 		return 1;
 	}
 	return 0;
@@ -4324,6 +4390,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
 		return 1;
 	}
 	return 0;
@@ -4610,14 +4677,24 @@ unsigned int intel_model_duplicates(unsigned int model)
 	case INTEL_FAM6_SKYLAKE:
 	case INTEL_FAM6_KABYLAKE_L:
 	case INTEL_FAM6_KABYLAKE:
+	case INTEL_FAM6_COMETLAKE_L:
+	case INTEL_FAM6_COMETLAKE:
 		return INTEL_FAM6_SKYLAKE_L;
 
 	case INTEL_FAM6_ICELAKE_L:
 	case INTEL_FAM6_ICELAKE_NNPI:
+	case INTEL_FAM6_TIGERLAKE_L:
+	case INTEL_FAM6_TIGERLAKE:
 		return INTEL_FAM6_CANNONLAKE_L;
 
 	case INTEL_FAM6_ATOM_TREMONT_D:
 		return INTEL_FAM6_ATOM_GOLDMONT_D;
+
+	case INTEL_FAM6_ATOM_TREMONT_L:
+		return INTEL_FAM6_ATOM_TREMONT;
+
+	case INTEL_FAM6_ICELAKE_X:
+		return INTEL_FAM6_SKYLAKE_X;
 	}
 	return model;
 }
@@ -4872,7 +4949,8 @@ void process_cpuid()
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
 
-	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
+	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
+	    is_ehl(family, model))
 		BIC_NOT_PRESENT(BIC_CPU_c3);
 
 	if (!quiet)
@@ -4907,10 +4985,16 @@ void process_cpuid()
 	else
 		BIC_NOT_PRESENT(BIC_CPU_LPI);
 
-	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+	if (!access(sys_lpi_file_sysfs, R_OK)) {
+		sys_lpi_file = sys_lpi_file_sysfs;
 		BIC_PRESENT(BIC_SYS_LPI);
-	else
+	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
+		sys_lpi_file = sys_lpi_file_debugfs;
+		BIC_PRESENT(BIC_SYS_LPI);
+	} else {
+		sys_lpi_file_sysfs = NULL;
 		BIC_NOT_PRESENT(BIC_SYS_LPI);
+	}
 
 	if (!quiet)
 		decode_misc_feature_control();
@@ -5306,7 +5390,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 19.08.31"
+	fprintf(outf, "turbostat version 20.03.20"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -5323,9 +5407,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 	}
 
 	msrp->msr_num = msr_num;
-	strncpy(msrp->name, name, NAME_BYTES);
+	strncpy(msrp->name, name, NAME_BYTES - 1);
 	if (path)
-		strncpy(msrp->path, path, PATH_BYTES);
+		strncpy(msrp->path, path, PATH_BYTES - 1);
 	msrp->width = width;
 	msrp->type = type;
 	msrp->format = format;
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index ded7a950dc40..a7974638561c 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 ifneq ($(O),)
 ifeq ($(origin O), command line)
-	dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
-	ABSOLUTE_O := $(shell cd $(O) ; pwd)
+	dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
+	ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd)
 	OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
 	COMMAND_O := O=$(ABSOLUTE_O)
 ifeq ($(objtree),)
@@ -106,6 +106,7 @@ ifneq ($(silent),1)
   ifneq ($(V),1)
 	QUIET_CC       = @echo '  CC       '$@;
 	QUIET_CC_FPIC  = @echo '  CC FPIC  '$@;
+	QUIET_CLANG    = @echo '  CLANG    '$@;
 	QUIET_AR       = @echo '  AR       '$@;
 	QUIET_LINK     = @echo '  LINK     '$@;
 	QUIET_MKDIR    = @echo '  MKDIR    '$@;
diff --git a/tools/spi/.gitignore b/tools/spi/.gitignore
index 4280576397e8..14ddba3d2195 100644
--- a/tools/spi/.gitignore
+++ b/tools/spi/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 spidev_fdx
 spidev_test
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 5c342e655e55..2249a1546cc1 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -51,7 +51,7 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN)
 
 clean:
 	rm -f $(ALL_PROGRAMS)
-	rm -f $(OUTPUT)include/linux/spi/spidev.h
+	rm -rf $(OUTPUT)include/
 	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
 install: $(ALL_PROGRAMS)
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 3559e7646256..27967dd90f8f 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
 #include <getopt.h>
 #include <fcntl.h>
 #include <time.h>
@@ -26,7 +27,11 @@
 
 static void pabort(const char *s)
 {
-	perror(s);
+	if (errno != 0)
+		perror(s);
+	else
+		printf("%s\n", s);
+
 	abort();
 }
 
@@ -283,7 +288,6 @@ static void parse_opts(int argc, char *argv[])
 			break;
 		default:
 			print_usage(argv[0]);
-			break;
 		}
 	}
 	if (mode & SPI_LOOP) {
@@ -405,6 +409,9 @@ int main(int argc, char *argv[])
 
 	parse_opts(argc, argv);
 
+	if (input_tx && input_file)
+		pabort("only one of -p and --input may be selected");
+
 	fd = open(device, O_RDWR);
 	if (fd < 0)
 		pabort("can't open device");
@@ -446,9 +453,6 @@ int main(int argc, char *argv[])
 	printf("bits per word: %d\n", bits);
 	printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
 
-	if (input_tx && input_file)
-		pabort("only one of -p and --input may be selected");
-
 	if (input_tx)
 		transfer_escaped_string(fd, input_tx);
 	else if (input_file)
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 220d04f958a6..7570e36d636d 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -30,7 +30,7 @@ my %default = (
     "EMAIL_WHEN_STARTED"	=> 0,
     "NUM_TESTS"			=> 1,
     "TEST_TYPE"			=> "build",
-    "BUILD_TYPE"		=> "randconfig",
+    "BUILD_TYPE"		=> "oldconfig",
     "MAKE_CMD"			=> "make",
     "CLOSE_CONSOLE_SIGNAL"	=> "INT",
     "TIMEOUT"			=> 120,
@@ -1030,7 +1030,7 @@ sub __read_config {
 	    }
 
 	    if (!$skip && $rest !~ /^\s*$/) {
-		die "$name: $.: Gargbage found after $type\n$_";
+		die "$name: $.: Garbage found after $type\n$_";
 	    }
 
 	    if ($skip && $type eq "TEST_START") {
@@ -1063,7 +1063,7 @@ sub __read_config {
 	    }
 
 	    if ($rest !~ /^\s*$/) {
-		die "$name: $.: Gargbage found after DEFAULTS\n$_";
+		die "$name: $.: Garbage found after DEFAULTS\n$_";
 	    }
 
 	} elsif (/^\s*INCLUDE\s+(\S+)/) {
@@ -1154,7 +1154,7 @@ sub __read_config {
 	    # on of these sections that have SKIP defined.
 	    # The save variable can be
 	    # defined multiple times and the new one simply overrides
-	    # the prevous one.
+	    # the previous one.
 	    set_variable($lvalue, $rvalue);
 
 	} else {
@@ -1234,7 +1234,7 @@ sub read_config {
 	foreach my $option (keys %not_used) {
 	    print "$option\n";
 	}
-	print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+	print "Set IGNORE_UNUSED = 1 to have ktest ignore unused variables\n";
 	if (!read_yn "Do you want to continue?") {
 	    exit -1;
 	}
@@ -1345,7 +1345,7 @@ sub eval_option {
 	# Check for recursive evaluations.
 	# 100 deep should be more than enough.
 	if ($r++ > 100) {
-	    die "Over 100 evaluations accurred with $option\n" .
+	    die "Over 100 evaluations occurred with $option\n" .
 		"Check for recursive variables\n";
 	}
 	$prev = $option;
@@ -1383,7 +1383,7 @@ sub reboot {
 
     } else {
 	# Make sure everything has been written to disk
-	run_ssh("sync");
+	run_ssh("sync", 10);
 
 	if (defined($time)) {
 	    start_monitor;
@@ -1461,7 +1461,7 @@ sub get_test_name() {
 
 sub dodie {
 
-    # avoid recusion
+    # avoid recursion
     return if ($in_die);
     $in_die = 1;
 
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index c3bc933d437b..27666b8007ed 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -10,7 +10,7 @@
 #
 
 # Options set in the beginning of the file are considered to be
-# default options. These options can be overriden by test specific
+# default options. These options can be overridden by test specific
 # options, with the following exceptions:
 #
 #  LOG_FILE
@@ -204,7 +204,7 @@
 #
 # This config file can also contain "config variables".
 # These are assigned with ":=" instead of the ktest option
-# assigment "=".
+# assignment "=".
 #
 # The difference between ktest options and config variables
 # is that config variables can be used multiple times,
@@ -263,7 +263,7 @@
 #### Using options in other options ####
 #
 # Options that are defined in the config file may also be used
-# by other options. All options are evaulated at time of
+# by other options. All options are evaluated at time of
 # use (except that config variables are evaluated at config
 # processing time).
 #
@@ -505,7 +505,7 @@
 #TEST = ssh user@machine /root/run_test
 
 # The build type is any make config type or special command
-#  (default randconfig)
+#  (default oldconfig)
 #   nobuild - skip the clean and build step
 #   useconfig:/path/to/config - use the given config and run
 #              oldconfig on it.
@@ -707,7 +707,7 @@
 
 # Line to define a successful boot up in console output.
 # This is what the line contains, not the entire line. If you need
-# the entire line to match, then use regural expression syntax like:
+# the entire line to match, then use regular expression syntax like:
 #  (do not add any quotes around it)
 #
 #  SUCCESS_LINE = ^MyBox Login:$
@@ -839,7 +839,7 @@
 # (ignored if POWEROFF_ON_SUCCESS is set)
 #REBOOT_ON_SUCCESS = 1
 
-# In case there are isses with rebooting, you can specify this
+# In case there are issues with rebooting, you can specify this
 # to always powercycle after this amount of time after calling
 # reboot.
 # Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
@@ -848,7 +848,7 @@
 # (default undefined)
 #POWERCYCLE_AFTER_REBOOT = 5
 
-# In case there's isses with halting, you can specify this
+# In case there's issues with halting, you can specify this
 # to always poweroff after this amount of time after calling
 # halt.
 # Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
@@ -972,7 +972,7 @@
 #
 #  PATCHCHECK_START is required and is the first patch to
 #   test (the SHA1 of the commit). You may also specify anything
-#   that git checkout allows (branch name, tage, HEAD~3).
+#   that git checkout allows (branch name, tag, HEAD~3).
 #
 #  PATCHCHECK_END is the last patch to check (default HEAD)
 #
@@ -994,7 +994,7 @@
 #     IGNORE_WARNINGS is set for the given commit's sha1
 #
 #   IGNORE_WARNINGS can be used to disable the failure of patchcheck
-#     on a particuler commit (SHA1). You can add more than one commit
+#     on a particular commit (SHA1). You can add more than one commit
 #     by adding a list of SHA1s that are space delimited.
 #
 #   If BUILD_NOCLEAN is set, then make mrproper will not be run on
@@ -1093,7 +1093,7 @@
 #   whatever reason. (Can't reboot, want to inspect each iteration)
 #   Doing a BISECT_MANUAL will have the test wait for you to
 #   tell it if the test passed or failed after each iteration.
-#   This is basicall the same as running git bisect yourself
+#   This is basically the same as running git bisect yourself
 #   but ktest will rebuild and install the kernel for you.
 #
 # BISECT_CHECK = 1 (optional, default 0)
@@ -1239,7 +1239,7 @@
 #
 # CONFIG_BISECT_EXEC (optional)
 #  The config bisect is a separate program that comes with ktest.pl.
-#  By befault, it will look for:
+#  By default, it will look for:
 #    `pwd`/config-bisect.pl # the location ktest.pl was executed from.
 #  If it does not find it there, it will look for:
 #    `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes
new file mode 100644
index 000000000000..5b7da1fc3b8f
--- /dev/null
+++ b/tools/testing/kunit/.gitattributes
@@ -0,0 +1 @@
+test_data/* binary
diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore
index c791ff59a37a..1c63e31f7edf 100644
--- a/tools/testing/kunit/.gitignore
+++ b/tools/testing/kunit/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 \ No newline at end of file
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
new file mode 100644
index 000000000000..239b9f03da2c
--- /dev/null
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -0,0 +1,41 @@
+# These are currently broken on UML and prevent allyesconfig from building
+# CONFIG_STATIC_LINK is not set
+# CONFIG_UML_NET_VECTOR is not set
+# CONFIG_UML_NET_VDE is not set
+# CONFIG_UML_NET_PCAP is not set
+# CONFIG_NET_PTP_CLASSIFY is not set
+# CONFIG_IP_VS is not set
+# CONFIG_BRIDGE_EBT_BROUTE is not set
+# CONFIG_BRIDGE_EBT_T_FILTER is not set
+# CONFIG_BRIDGE_EBT_T_NAT is not set
+# CONFIG_MTD_NAND_CADENCE is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_BLK_DEV_NULL_BLK is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+# CONFIG_NULL_TTY is not set
+# CONFIG_PTP_1588_CLOCK is not set
+# CONFIG_PINCTRL_EQUILIBRIUM is not set
+# CONFIG_DMABUF_SELFTESTS is not set
+# CONFIG_COMEDI is not set
+# CONFIG_XIL_AXIS_FIFO is not set
+# CONFIG_EXFAT_FS is not set
+# CONFIG_STM_DUMMY is not set
+# CONFIG_FSI_MASTER_ASPEED is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_UBIFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
+# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set
+# CONFIG_KCOV is not set
+# CONFIG_LKDTM is not set
+# CONFIG_REED_SOLOMON_TEST is not set
+# CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_MEMINIT is not set
+# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
+# CONFIG_DEBUG_INFO_BTF is not set
+# CONFIG_PTP_1588_CLOCK_INES is not set
+# CONFIG_QCOM_CPR is not set
+# CONFIG_RESET_BRCMSTB_RESCAL is not set
+# CONFIG_RESET_INTEL_GW is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 180ad1e1b04f..7dca74774dd2 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -22,7 +22,9 @@ import kunit_parser
 
 KunitResult = namedtuple('KunitResult', ['status','result'])
 
-KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig'])
+KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
+					   'build_dir', 'defconfig',
+					   'alltests', 'make_options'])
 
 KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
 
@@ -47,7 +49,7 @@ def get_kernel_root_path():
 def run_tests(linux: kunit_kernel.LinuxSourceTree,
 	      request: KunitRequest) -> KunitResult:
 	config_start = time.time()
-	success = linux.build_reconfig(request.build_dir)
+	success = linux.build_reconfig(request.build_dir, request.make_options)
 	config_end = time.time()
 	if not success:
 		return KunitResult(KunitStatus.CONFIG_FAILURE, 'could not configure kernel')
@@ -55,24 +57,24 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
 	kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
 
 	build_start = time.time()
-	success = linux.build_um_kernel(request.jobs, request.build_dir)
+	success = linux.build_um_kernel(request.alltests,
+					request.jobs,
+					request.build_dir,
+					request.make_options)
 	build_end = time.time()
 	if not success:
 		return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel')
 
 	kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
 	test_start = time.time()
-
-	test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
-					      [],
-					      'Tests not Parsed.')
+	kunit_output = linux.run_kernel(
+		timeout=None if request.alltests else request.timeout,
+		build_dir=request.build_dir)
 	if request.raw_output:
-		kunit_parser.raw_output(
-			linux.run_kernel(timeout=request.timeout,
-					 build_dir=request.build_dir))
+		raw_output = kunit_parser.raw_output(kunit_output)
+		isolated = list(kunit_parser.isolate_kunit_output(raw_output))
+		test_result = kunit_parser.parse_test_result(isolated)
 	else:
-		kunit_output = linux.run_kernel(timeout=request.timeout,
-						build_dir=request.build_dir)
 		test_result = kunit_parser.parse_run_tests(kunit_output)
 	test_end = time.time()
 
@@ -120,6 +122,14 @@ def main(argv, linux=None):
 				help='Uses a default .kunitconfig.',
 				action='store_true')
 
+	run_parser.add_argument('--alltests',
+				help='Run all KUnit tests through allyesconfig',
+				action='store_true')
+
+	run_parser.add_argument('--make_options',
+				help='X=Y make option, can be repeated.',
+				action='append')
+
 	cli_args = parser.parse_args(argv)
 
 	if cli_args.subcommand == 'run':
@@ -143,7 +153,9 @@ def main(argv, linux=None):
 				       cli_args.timeout,
 				       cli_args.jobs,
 				       cli_args.build_dir,
-				       cli_args.defconfig)
+				       cli_args.defconfig,
+				       cli_args.alltests,
+				       cli_args.make_options)
 		result = run_tests(linux, request)
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index ebf3942b23f5..e75063d603b5 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -9,16 +9,18 @@
 import collections
 import re
 
-CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$'
-CONFIG_PATTERN = r'^CONFIG_\w+=\S+$'
-
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry'])
+CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
+CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+)$'
 
+KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
 
 class KconfigEntry(KconfigEntryBase):
 
 	def __str__(self) -> str:
-		return self.raw_entry
+		if self.value == 'n':
+			return r'# CONFIG_%s is not set' % (self.name)
+		else:
+			return r'CONFIG_%s=%s' % (self.name, self.value)
 
 
 class KconfigParseError(Exception):
@@ -38,7 +40,17 @@ class Kconfig(object):
 		self._entries.append(entry)
 
 	def is_subset_of(self, other: 'Kconfig') -> bool:
-		return self.entries().issubset(other.entries())
+		for a in self.entries():
+			found = False
+			for b in other.entries():
+				if a.name != b.name:
+					continue
+				if a.value != b.value:
+					return False
+				found = True
+			if a.value != 'n' and found == False:
+				return False
+		return True
 
 	def write_to_file(self, path: str) -> None:
 		with open(path, 'w') as f:
@@ -54,9 +66,20 @@ class Kconfig(object):
 			line = line.strip()
 			if not line:
 				continue
-			elif config_matcher.match(line) or is_not_set_matcher.match(line):
-				self._entries.append(KconfigEntry(line))
-			elif line[0] == '#':
+
+			match = config_matcher.match(line)
+			if match:
+				entry = KconfigEntry(match.group(1), match.group(2))
+				self.add_entry(entry)
+				continue
+
+			empty_match = is_not_set_matcher.match(line)
+			if empty_match:
+				entry = KconfigEntry(empty_match.group(1), 'n')
+				self.add_entry(entry)
+				continue
+
+			if line[0] == '#':
 				continue
 			else:
 				raise KconfigParseError('Failed to parse: ' + line)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d99ae75ef72f..63dbda2d029f 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -10,11 +10,16 @@
 import logging
 import subprocess
 import os
+import signal
+
+from contextlib import ExitStack
 
 import kunit_config
+import kunit_parser
 
 KCONFIG_PATH = '.config'
 kunitconfig_path = '.kunitconfig'
+BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
 
 class ConfigError(Exception):
 	"""Represents an error trying to configure the Linux kernel."""
@@ -35,19 +40,40 @@ class LinuxSourceTreeOperations(object):
 		except subprocess.CalledProcessError as e:
 			raise ConfigError(e.output)
 
-	def make_olddefconfig(self, build_dir):
+	def make_olddefconfig(self, build_dir, make_options):
 		command = ['make', 'ARCH=um', 'olddefconfig']
+		if make_options:
+			command.extend(make_options)
 		if build_dir:
 			command += ['O=' + build_dir]
 		try:
-			subprocess.check_output(command)
+			subprocess.check_output(command, stderr=subprocess.PIPE)
 		except OSError as e:
 			raise ConfigError('Could not call make command: ' + e)
 		except subprocess.CalledProcessError as e:
 			raise ConfigError(e.output)
 
-	def make(self, jobs, build_dir):
+	def make_allyesconfig(self):
+		kunit_parser.print_with_timestamp(
+			'Enabling all CONFIGs for UML...')
+		process = subprocess.Popen(
+			['make', 'ARCH=um', 'allyesconfig'],
+			stdout=subprocess.DEVNULL,
+			stderr=subprocess.STDOUT)
+		process.wait()
+		kunit_parser.print_with_timestamp(
+			'Disabling broken configs to run KUnit tests...')
+		with ExitStack() as es:
+			config = open(KCONFIG_PATH, 'a')
+			disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
+			config.write(disable)
+		kunit_parser.print_with_timestamp(
+			'Starting Kernel with all configs takes a few minutes...')
+
+	def make(self, jobs, build_dir, make_options):
 		command = ['make', 'ARCH=um', '--jobs=' + str(jobs)]
+		if make_options:
+			command.extend(make_options)
 		if build_dir:
 			command += ['O=' + build_dir]
 		try:
@@ -57,18 +83,16 @@ class LinuxSourceTreeOperations(object):
 		except subprocess.CalledProcessError as e:
 			raise BuildError(e.output)
 
-	def linux_bin(self, params, timeout, build_dir):
+	def linux_bin(self, params, timeout, build_dir, outfile):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
 		linux_bin = './linux'
 		if build_dir:
 			linux_bin = os.path.join(build_dir, 'linux')
-		process = subprocess.Popen(
-			[linux_bin] + params,
-			stdin=subprocess.PIPE,
-			stdout=subprocess.PIPE,
-			stderr=subprocess.PIPE)
-		process.wait(timeout=timeout)
-		return process
+		with open(outfile, 'w') as output:
+			process = subprocess.Popen([linux_bin] + params,
+						   stdout=output,
+						   stderr=subprocess.STDOUT)
+			process.wait(timeout)
 
 
 def get_kconfig_path(build_dir):
@@ -84,6 +108,7 @@ class LinuxSourceTree(object):
 		self._kconfig = kunit_config.Kconfig()
 		self._kconfig.read_from_file(kunitconfig_path)
 		self._ops = LinuxSourceTreeOperations()
+		signal.signal(signal.SIGINT, self.signal_handler)
 
 	def clean(self):
 		try:
@@ -107,19 +132,19 @@ class LinuxSourceTree(object):
 			return False
 		return True
 
-	def build_config(self, build_dir):
+	def build_config(self, build_dir, make_options):
 		kconfig_path = get_kconfig_path(build_dir)
 		if build_dir and not os.path.exists(build_dir):
 			os.mkdir(build_dir)
 		self._kconfig.write_to_file(kconfig_path)
 		try:
-			self._ops.make_olddefconfig(build_dir)
+			self._ops.make_olddefconfig(build_dir, make_options)
 		except ConfigError as e:
 			logging.error(e)
 			return False
 		return self.validate_config(build_dir)
 
-	def build_reconfig(self, build_dir):
+	def build_reconfig(self, build_dir, make_options):
 		"""Creates a new .config if it is not a subset of the .kunitconfig."""
 		kconfig_path = get_kconfig_path(build_dir)
 		if os.path.exists(kconfig_path):
@@ -128,26 +153,33 @@ class LinuxSourceTree(object):
 			if not self._kconfig.is_subset_of(existing_kconfig):
 				print('Regenerating .config ...')
 				os.remove(kconfig_path)
-				return self.build_config(build_dir)
+				return self.build_config(build_dir, make_options)
 			else:
 				return True
 		else:
 			print('Generating .config ...')
-			return self.build_config(build_dir)
+			return self.build_config(build_dir, make_options)
 
-	def build_um_kernel(self, jobs, build_dir):
+	def build_um_kernel(self, alltests, jobs, build_dir, make_options):
+		if alltests:
+			self._ops.make_allyesconfig()
 		try:
-			self._ops.make_olddefconfig(build_dir)
-			self._ops.make(jobs, build_dir)
+			self._ops.make_olddefconfig(build_dir, make_options)
+			self._ops.make(jobs, build_dir, make_options)
 		except (ConfigError, BuildError) as e:
 			logging.error(e)
 			return False
 		return self.validate_config(build_dir)
 
-	def run_kernel(self, args=[], timeout=None, build_dir=''):
-		args.extend(['mem=256M'])
-		process = self._ops.linux_bin(args, timeout, build_dir)
-		with open(os.path.join(build_dir, 'test.log'), 'w') as f:
-			for line in process.stdout:
-				f.write(line.rstrip().decode('ascii') + '\n')
-				yield line.rstrip().decode('ascii')
+	def run_kernel(self, args=[], build_dir='', timeout=None):
+		args.extend(['mem=1G'])
+		outfile = 'test.log'
+		self._ops.linux_bin(args, timeout, build_dir, outfile)
+		subprocess.call(['stty', 'sane'])
+		with open(outfile, 'r') as file:
+			for line in file:
+				yield line
+
+	def signal_handler(self, sig, frame):
+		logging.error('Build interruption occurred. Cleaning console.')
+		subprocess.call(['stty', 'sane'])
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 4ffbae0f6732..64aac9dcd431 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -46,23 +46,26 @@ class TestStatus(Enum):
 	TEST_CRASHED = auto()
 	NO_TESTS = auto()
 
-kunit_start_re = re.compile(r'^TAP version [0-9]+$')
-kunit_end_re = re.compile('List of all partitions:')
+kunit_start_re = re.compile(r'TAP version [0-9]+$')
+kunit_end_re = re.compile('(List of all partitions:|'
+			  'Kernel panic - not syncing: VFS:|reboot: System halted)')
 
 def isolate_kunit_output(kernel_output):
 	started = False
 	for line in kernel_output:
-		if kunit_start_re.match(line):
+		if kunit_start_re.search(line):
+			prefix_len = len(line.split('TAP version')[0])
 			started = True
-			yield line
-		elif kunit_end_re.match(line):
+			yield line[prefix_len:] if prefix_len > 0 else line
+		elif kunit_end_re.search(line):
 			break
 		elif started:
-			yield line
+			yield line[prefix_len:] if prefix_len > 0 else line
 
 def raw_output(kernel_output):
 	for line in kernel_output:
 		print(line)
+		yield line
 
 DIVIDER = '=' * 60
 
@@ -91,7 +94,7 @@ def print_log(log):
 	for m in log:
 		print_with_timestamp(m)
 
-TAP_ENTRIES = re.compile(r'^(TAP|\t?ok|\t?not ok|\t?[0-9]+\.\.[0-9]+|\t?#).*$')
+TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$')
 
 def consume_non_diagnositic(lines: List[str]) -> None:
 	while lines and not TAP_ENTRIES.match(lines[0]):
@@ -104,22 +107,20 @@ def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None:
 
 OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
 
-OK_NOT_OK_SUBTEST = re.compile(r'^\t(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
 
 OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
 
-def parse_ok_not_ok_test_case(lines: List[str],
-			      test_case: TestCase,
-			      expecting_test_case: bool) -> bool:
+def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
 	save_non_diagnositic(lines, test_case)
 	if not lines:
-		if expecting_test_case:
-			test_case.status = TestStatus.TEST_CRASHED
-			return True
-		else:
-			return False
+		test_case.status = TestStatus.TEST_CRASHED
+		return True
 	line = lines[0]
 	match = OK_NOT_OK_SUBTEST.match(line)
+	while not match and lines:
+		line = lines.pop(0)
+		match = OK_NOT_OK_SUBTEST.match(line)
 	if match:
 		test_case.log.append(lines.pop(0))
 		test_case.name = match.group(2)
@@ -133,7 +134,7 @@ def parse_ok_not_ok_test_case(lines: List[str],
 	else:
 		return False
 
-SUBTEST_DIAGNOSTIC = re.compile(r'^\t# .*?: (.*)$')
+SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$')
 DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!'
 
 def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
@@ -150,17 +151,17 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	else:
 		return False
 
-def parse_test_case(lines: List[str], expecting_test_case: bool) -> TestCase:
+def parse_test_case(lines: List[str]) -> TestCase:
 	test_case = TestCase()
 	save_non_diagnositic(lines, test_case)
 	while parse_diagnostic(lines, test_case):
 		pass
-	if parse_ok_not_ok_test_case(lines, test_case, expecting_test_case):
+	if parse_ok_not_ok_test_case(lines, test_case):
 		return test_case
 	else:
 		return None
 
-SUBTEST_HEADER = re.compile(r'^\t# Subtest: (.*)$')
+SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$')
 
 def parse_subtest_header(lines: List[str]) -> str:
 	consume_non_diagnositic(lines)
@@ -173,7 +174,7 @@ def parse_subtest_header(lines: List[str]) -> str:
 	else:
 		return None
 
-SUBTEST_PLAN = re.compile(r'\t[0-9]+\.\.([0-9]+)')
+SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)')
 
 def parse_subtest_plan(lines: List[str]) -> int:
 	consume_non_diagnositic(lines)
@@ -234,11 +235,11 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
 	expected_test_case_num = parse_subtest_plan(lines)
 	if not expected_test_case_num:
 		return None
-	test_case = parse_test_case(lines, expected_test_case_num > 0)
-	expected_test_case_num -= 1
-	while test_case:
+	while expected_test_case_num > 0:
+		test_case = parse_test_case(lines)
+		if not test_case:
+			break
 		test_suite.cases.append(test_case)
-		test_case = parse_test_case(lines, expected_test_case_num > 0)
 		expected_test_case_num -= 1
 	if parse_ok_not_ok_test_suite(lines, test_suite):
 		test_suite.status = bubble_up_test_case_errors(test_suite)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index cba97756ac4a..984588d6ba95 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -37,7 +37,7 @@ class KconfigTest(unittest.TestCase):
 		self.assertTrue(kconfig0.is_subset_of(kconfig0))
 
 		kconfig1 = kunit_config.Kconfig()
-		kconfig1.add_entry(kunit_config.KconfigEntry('CONFIG_TEST=y'))
+		kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y'))
 		self.assertTrue(kconfig1.is_subset_of(kconfig1))
 		self.assertTrue(kconfig0.is_subset_of(kconfig1))
 		self.assertFalse(kconfig1.is_subset_of(kconfig0))
@@ -51,15 +51,15 @@ class KconfigTest(unittest.TestCase):
 
 		expected_kconfig = kunit_config.Kconfig()
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_UML=y'))
+			kunit_config.KconfigEntry('UML', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_MMU=y'))
+			kunit_config.KconfigEntry('MMU', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_TEST=y'))
+			kunit_config.KconfigEntry('TEST', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+			kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+			kunit_config.KconfigEntry('MK8', 'n'))
 
 		self.assertEqual(kconfig.entries(), expected_kconfig.entries())
 
@@ -68,15 +68,15 @@ class KconfigTest(unittest.TestCase):
 
 		expected_kconfig = kunit_config.Kconfig()
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_UML=y'))
+			kunit_config.KconfigEntry('UML', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_MMU=y'))
+			kunit_config.KconfigEntry('MMU', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_TEST=y'))
+			kunit_config.KconfigEntry('TEST', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+			kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
 		expected_kconfig.add_entry(
-			kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+			kunit_config.KconfigEntry('MK8', 'n'))
 
 		expected_kconfig.write_to_file(kconfig_path)
 
@@ -108,6 +108,36 @@ class KUnitParserTest(unittest.TestCase):
 		self.assertContains('ok 1 - example', result)
 		file.close()
 
+	def test_output_with_prefix_isolated_correctly(self):
+		log_path = get_absolute_path(
+			'test_data/test_pound_sign.log')
+		with open(log_path) as file:
+			result = kunit_parser.isolate_kunit_output(file.readlines())
+		self.assertContains('TAP version 14\n', result)
+		self.assertContains('	# Subtest: kunit-resource-test', result)
+		self.assertContains('	1..5', result)
+		self.assertContains('	ok 1 - kunit_resource_test_init_resources', result)
+		self.assertContains('	ok 2 - kunit_resource_test_alloc_resource', result)
+		self.assertContains('	ok 3 - kunit_resource_test_destroy_resource', result)
+		self.assertContains(' foo bar 	#', result)
+		self.assertContains('	ok 4 - kunit_resource_test_cleanup_resources', result)
+		self.assertContains('	ok 5 - kunit_resource_test_proper_free_ordering', result)
+		self.assertContains('ok 1 - kunit-resource-test', result)
+		self.assertContains(' foo bar 	# non-kunit output', result)
+		self.assertContains('	# Subtest: kunit-try-catch-test', result)
+		self.assertContains('	1..2', result)
+		self.assertContains('	ok 1 - kunit_test_try_catch_successful_try_no_catch',
+				    result)
+		self.assertContains('	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch',
+				    result)
+		self.assertContains('ok 2 - kunit-try-catch-test', result)
+		self.assertContains('	# Subtest: string-stream-test', result)
+		self.assertContains('	1..3', result)
+		self.assertContains('	ok 1 - string_stream_test_empty_on_creation', result)
+		self.assertContains('	ok 2 - string_stream_test_not_empty_after_add', result)
+		self.assertContains('	ok 3 - string_stream_test_get_string', result)
+		self.assertContains('ok 3 - string-stream-test', result)
+
 	def test_parse_successful_test_log(self):
 		all_passed_log = get_absolute_path(
 			'test_data/test_is_test_passed-all_passed.log')
@@ -150,6 +180,45 @@ class KUnitParserTest(unittest.TestCase):
 			result.status)
 		file.close()
 
+	def test_ignores_prefix_printk_time(self):
+		prefix_log = get_absolute_path(
+			'test_data/test_config_printk_time.log')
+		with open(prefix_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+	def test_ignores_multiple_prefixes(self):
+		prefix_log = get_absolute_path(
+			'test_data/test_multiple_prefixes.log')
+		with open(prefix_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+	def test_prefix_mixed_kernel_output(self):
+		mixed_prefix_log = get_absolute_path(
+			'test_data/test_interrupted_tap_output.log')
+		with open(mixed_prefix_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+	def test_prefix_poundsign(self):
+		pound_log = get_absolute_path('test_data/test_pound_sign.log')
+		with open(pound_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+	def test_kernel_panic_end(self):
+		panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
+		with open(panic_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+	def test_pound_no_prefix(self):
+		pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
+		with open(pound_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
+		self.assertEqual('kunit-resource-test', result.suites[0].name)
+
 class StrContains(str):
 	def __eq__(self, other):
 		return self in other
@@ -174,7 +243,8 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run'], self.linux_source_mock)
 		assert self.linux_source_mock.build_reconfig.call_count == 1
 		assert self.linux_source_mock.run_kernel.call_count == 1
-		self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300)
+		self.linux_source_mock.run_kernel.assert_called_once_with(
+			build_dir='', timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_passes_args_fail(self):
@@ -189,25 +259,27 @@ class KUnitMainTest(unittest.TestCase):
 
 	def test_run_raw_output(self):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
-		kunit.main(['run', '--raw_output'], self.linux_source_mock)
+		with self.assertRaises(SystemExit) as e:
+			kunit.main(['run', '--raw_output'], self.linux_source_mock)
+		assert type(e.exception) == SystemExit
+		assert e.exception.code == 1
 		assert self.linux_source_mock.build_reconfig.call_count == 1
 		assert self.linux_source_mock.run_kernel.call_count == 1
-		for kall in self.print_mock.call_args_list:
-			assert kall != mock.call(StrContains('Testing complete.'))
-			assert kall != mock.call(StrContains(' 0 tests run'))
 
 	def test_run_timeout(self):
 		timeout = 3453
 		kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
 		assert self.linux_source_mock.build_reconfig.call_count == 1
-		self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout)
+		self.linux_source_mock.run_kernel.assert_called_once_with(
+			build_dir='', timeout=timeout)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_builddir(self):
 		build_dir = '.kunit'
 		kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock)
 		assert self.linux_source_mock.build_reconfig.call_count == 1
-		self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
+		self.linux_source_mock.run_kernel.assert_called_once_with(
+			build_dir=build_dir, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 if __name__ == '__main__':
diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log
new file mode 100644
index 000000000000..c02ca773946d
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_config_printk_time.log
@@ -0,0 +1,31 @@
+[    0.060000] printk: console [mc-1] enabled
+[    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000] TAP version 14
+[    0.060000] 	# Subtest: kunit-resource-test
+[    0.060000] 	1..5
+[    0.060000] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000] ok 1 - kunit-resource-test
+[    0.060000] 	# Subtest: kunit-try-catch-test
+[    0.060000] 	1..2
+[    0.060000] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000] ok 2 - kunit-try-catch-test
+[    0.060000] 	# Subtest: string-stream-test
+[    0.060000] 	1..3
+[    0.060000] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000] 	ok 2 - string_stream_test_not_empty_after_add
+[    0.060000] 	ok 3 - string_stream_test_get_string
+[    0.060000] ok 3 - string-stream-test
+[    0.060000] List of all partitions:
+[    0.060000] No filesystem could mount root, tried:
+[    0.060000]
+[    0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000] Stack:
+[    0.060000]  602086f8 601bc260 705c0000 705c0000
+[    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
new file mode 100644
index 000000000000..5c73fb3a1c6f
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
@@ -0,0 +1,37 @@
+[    0.060000] printk: console [mc-1] enabled
+[    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000] TAP version 14
+[    0.060000] 	# Subtest: kunit-resource-test
+[    0.060000] 	1..5
+[    0.060000] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000] kAFS: Red Hat AFS client v0.1 registering.
+[    0.060000] FS-Cache: Netfs 'afs' registered for caching
+[    0.060000] *** VALIDATE kAFS ***
+[    0.060000] Btrfs loaded, crc32c=crc32c-generic, debug=on, assert=on, integrity-checker=on, ref-verify=on
+[    0.060000] BTRFS: selftest: sectorsize: 4096  nodesize: 4096
+[    0.060000] BTRFS: selftest: running btrfs free space cache tests
+[    0.060000] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000] ok 1 - kunit-resource-test
+[    0.060000] 	# Subtest: kunit-try-catch-test
+[    0.060000] 	1..2
+[    0.060000] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000] ok 2 - kunit-try-catch-test
+[    0.060000] 	# Subtest: string-stream-test
+[    0.060000] 	1..3
+[    0.060000] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000] 	ok 2 - string_stream_test_not_empty_after_add
+[    0.060000] 	ok 3 - string_stream_test_get_string
+[    0.060000] ok 3 - string-stream-test
+[    0.060000] List of all partitions:
+[    0.060000] No filesystem could mount root, tried:
+[    0.060000]
+[    0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000] Stack:
+[    0.060000]  602086f8 601bc260 705c0000 705c0000
+[    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
new file mode 100644
index 000000000000..c045eee75f27
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
@@ -0,0 +1,25 @@
+[    0.060000] printk: console [mc-1] enabled
+[    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000] TAP version 14
+[    0.060000] 	# Subtest: kunit-resource-test
+[    0.060000] 	1..5
+[    0.060000] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000] ok 1 - kunit-resource-test
+[    0.060000] 	# Subtest: kunit-try-catch-test
+[    0.060000] 	1..2
+[    0.060000] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000] ok 2 - kunit-try-catch-test
+[    0.060000] 	# Subtest: string-stream-test
+[    0.060000] 	1..3
+[    0.060000] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000] 	Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000] Stack:
+[    0.060000]  602086f8 601bc260 705c0000 705c0000
+[    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log
new file mode 100644
index 000000000000..bc48407dcc36
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log
@@ -0,0 +1,31 @@
+[    0.060000][    T1] printk: console [mc-1] enabled
+[    0.060000][    T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000][    T1] TAP version 14
+[    0.060000][    T1] 	# Subtest: kunit-resource-test
+[    0.060000][    T1] 	1..5
+[    0.060000][    T1] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000][    T1] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000][    T1] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000][    T1] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000][    T1] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000][    T1] ok 1 - kunit-resource-test
+[    0.060000][    T1] 	# Subtest: kunit-try-catch-test
+[    0.060000][    T1] 	1..2
+[    0.060000][    T1] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000][    T1] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000][    T1] ok 2 - kunit-try-catch-test
+[    0.060000][    T1] 	# Subtest: string-stream-test
+[    0.060000][    T1] 	1..3
+[    0.060000][    T1] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000][    T1] 	ok 2 - string_stream_test_not_empty_after_add
+[    0.060000][    T1] 	ok 3 - string_stream_test_get_string
+[    0.060000][    T1] ok 3 - string-stream-test
+[    0.060000][    T1] List of all partitions:
+[    0.060000][    T1] No filesystem could mount root, tried:
+[    0.060000][    T1]
+[    0.060000][    T1] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000][    T1] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000][    T1] Stack:
+[    0.060000][    T1]  602086f8 601bc260 705c0000 705c0000
+[    0.060000][    T1]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000][    T1]  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log
new file mode 100644
index 000000000000..0f87cdabebb0
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log
@@ -0,0 +1,33 @@
+[    0.060000] printk: console [mc-1] enabled
+[    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000] TAP version 14
+[    0.060000] 	# Subtest: kunit-resource-test
+[    0.060000] 	1..5
+[    0.060000] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000]  foo bar 	#
+[    0.060000] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000] ok 1 - kunit-resource-test
+[    0.060000]  foo bar 	# non-kunit output
+[    0.060000] 	# Subtest: kunit-try-catch-test
+[    0.060000] 	1..2
+[    0.060000] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000] ok 2 - kunit-try-catch-test
+[    0.060000] 	# Subtest: string-stream-test
+[    0.060000] 	1..3
+[    0.060000] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000] 	ok 2 - string_stream_test_not_empty_after_add
+[    0.060000] 	ok 3 - string_stream_test_get_string
+[    0.060000] ok 3 - string-stream-test
+[    0.060000] List of all partitions:
+[    0.060000] No filesystem could mount root, tried:
+[    0.060000]
+[    0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000] Stack:
+[    0.060000]  602086f8 601bc260 705c0000 705c0000
+[    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log
new file mode 100644
index 000000000000..2ceb360be7d5
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log
@@ -0,0 +1,33 @@
+ printk: console [mc-1] enabled
+ random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+ TAP version 14
+ 	# Subtest: kunit-resource-test
+ 	1..5
+ 	ok 1 - kunit_resource_test_init_resources
+ 	ok 2 - kunit_resource_test_alloc_resource
+ 	ok 3 - kunit_resource_test_destroy_resource
+  foo bar 	#
+ 	ok 4 - kunit_resource_test_cleanup_resources
+ 	ok 5 - kunit_resource_test_proper_free_ordering
+ ok 1 - kunit-resource-test
+  foo bar 	# non-kunit output
+ 	# Subtest: kunit-try-catch-test
+ 	1..2
+ 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+ 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+ ok 2 - kunit-try-catch-test
+ 	# Subtest: string-stream-test
+ 	1..3
+ 	ok 1 - string_stream_test_empty_on_creation
+ 	ok 2 - string_stream_test_not_empty_after_add
+ 	ok 3 - string_stream_test_get_string
+ ok 3 - string-stream-test
+ List of all partitions:
+ No filesystem could mount root, tried:
+
+ Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+ CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+ Stack:
+  602086f8 601bc260 705c0000 705c0000
+  602086f8 6005fcec 705c0000 6002c6ab
+  6005fcec 601bc260 705c0000 3000000010
+\ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log
new file mode 100644
index 000000000000..28ffa5ba03bf
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_pound_sign.log
@@ -0,0 +1,33 @@
+[    0.060000] printk: console [mc-1] enabled
+[    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[    0.060000] TAP version 14
+[    0.060000] 	# Subtest: kunit-resource-test
+[    0.060000] 	1..5
+[    0.060000] 	ok 1 - kunit_resource_test_init_resources
+[    0.060000] 	ok 2 - kunit_resource_test_alloc_resource
+[    0.060000] 	ok 3 - kunit_resource_test_destroy_resource
+[    0.060000]  foo bar 	#
+[    0.060000] 	ok 4 - kunit_resource_test_cleanup_resources
+[    0.060000] 	ok 5 - kunit_resource_test_proper_free_ordering
+[    0.060000] ok 1 - kunit-resource-test
+[    0.060000]  foo bar 	# non-kunit output
+[    0.060000] 	# Subtest: kunit-try-catch-test
+[    0.060000] 	1..2
+[    0.060000] 	ok 1 - kunit_test_try_catch_successful_try_no_catch
+[    0.060000] 	ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[    0.060000] ok 2 - kunit-try-catch-test
+[    0.060000] 	# Subtest: string-stream-test
+[    0.060000] 	1..3
+[    0.060000] 	ok 1 - string_stream_test_empty_on_creation
+[    0.060000] 	ok 2 - string_stream_test_not_empty_after_add
+[    0.060000] 	ok 3 - string_stream_test_get_string
+[    0.060000] ok 3 - string-stream-test
+[    0.060000] List of all partitions:
+[    0.060000] No filesystem could mount root, tried:
+[    0.060000]
+[    0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[    0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[    0.060000] Stack:
+[    0.060000]  602086f8 601bc260 705c0000 705c0000
+[    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 3834899b6693..d971516401e6 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 generated/map-shift.h
 idr.c
 idr-test
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 397d6b612502..aa6abfe0749c 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -7,8 +7,8 @@ LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder xarray
 CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
-	 regression4.o \
-	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+	 regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
+	 iteration_check_2.o benchmark.o
 
 ifndef SHIFT
 	SHIFT=3
diff --git a/tools/testing/radix-tree/iteration_check_2.c b/tools/testing/radix-tree/iteration_check_2.c
new file mode 100644
index 000000000000..aac5c50a3674
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check_2.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * iteration_check_2.c: Check that deleting a tagged entry doesn't cause
+ * an RCU walker to finish early.
+ * Copyright (c) 2020 Oracle
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+#include <pthread.h>
+#include "test.h"
+
+static volatile bool test_complete;
+
+static void *iterator(void *arg)
+{
+	XA_STATE(xas, arg, 0);
+	void *entry;
+
+	rcu_register_thread();
+
+	while (!test_complete) {
+		xas_set(&xas, 0);
+		rcu_read_lock();
+		xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+			;
+		rcu_read_unlock();
+		assert(xas.xa_index >= 100);
+	}
+
+	rcu_unregister_thread();
+	return NULL;
+}
+
+static void *throbber(void *arg)
+{
+	struct xarray *xa = arg;
+
+	rcu_register_thread();
+
+	while (!test_complete) {
+		int i;
+
+		for (i = 0; i < 100; i++) {
+			xa_store(xa, i, xa_mk_value(i), GFP_KERNEL);
+			xa_set_mark(xa, i, XA_MARK_0);
+		}
+		for (i = 0; i < 100; i++)
+			xa_erase(xa, i);
+	}
+
+	rcu_unregister_thread();
+	return NULL;
+}
+
+void iteration_test2(unsigned test_duration)
+{
+	pthread_t threads[2];
+	DEFINE_XARRAY(array);
+	int i;
+
+	printv(1, "Running iteration test 2 for %d seconds\n", test_duration);
+
+	test_complete = false;
+
+	xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL);
+	xa_set_mark(&array, 100, XA_MARK_0);
+
+	if (pthread_create(&threads[0], NULL, iterator, &array)) {
+		perror("create iterator thread");
+		exit(1);
+	}
+	if (pthread_create(&threads[1], NULL, throbber, &array)) {
+		perror("create throbber thread");
+		exit(1);
+	}
+
+	sleep(test_duration);
+	test_complete = true;
+
+	for (i = 0; i < 2; i++) {
+		if (pthread_join(threads[i], NULL)) {
+			perror("pthread_join");
+			exit(1);
+		}
+	}
+
+	xa_destroy(&array);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 44a0d1ad4408..2d9c59df60de 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -19,37 +19,44 @@ int test_verbose;
 
 struct kmem_cache {
 	pthread_mutex_t lock;
-	int size;
+	unsigned int size;
+	unsigned int align;
 	int nr_objs;
 	void *objs;
 	void (*ctor)(void *);
 };
 
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
 {
-	struct radix_tree_node *node;
+	void *p;
 
-	if (!(flags & __GFP_DIRECT_RECLAIM))
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
 		return NULL;
 
 	pthread_mutex_lock(&cachep->lock);
 	if (cachep->nr_objs) {
+		struct radix_tree_node *node = cachep->objs;
 		cachep->nr_objs--;
-		node = cachep->objs;
 		cachep->objs = node->parent;
 		pthread_mutex_unlock(&cachep->lock);
 		node->parent = NULL;
+		p = node;
 	} else {
 		pthread_mutex_unlock(&cachep->lock);
-		node = malloc(cachep->size);
+		if (cachep->align)
+			posix_memalign(&p, cachep->align, cachep->size);
+		else
+			p = malloc(cachep->size);
 		if (cachep->ctor)
-			cachep->ctor(node);
+			cachep->ctor(p);
+		else if (gfp & __GFP_ZERO)
+			memset(p, 0, cachep->size);
 	}
 
 	uatomic_inc(&nr_allocated);
 	if (kmalloc_verbose)
-		printf("Allocating %p from slab\n", node);
-	return node;
+		printf("Allocating %p from slab\n", p);
+	return p;
 }
 
 void kmem_cache_free(struct kmem_cache *cachep, void *objp)
@@ -59,7 +66,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 	if (kmalloc_verbose)
 		printf("Freeing %p to slab\n", objp);
 	pthread_mutex_lock(&cachep->lock);
-	if (cachep->nr_objs > 10) {
+	if (cachep->nr_objs > 10 || cachep->align) {
 		memset(objp, POISON_FREE, cachep->size);
 		free(objp);
 	} else {
@@ -98,13 +105,14 @@ void kfree(void *p)
 }
 
 struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t offset,
-	unsigned long flags, void (*ctor)(void *))
+kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+		unsigned int flags, void (*ctor)(void *))
 {
 	struct kmem_cache *ret = malloc(sizeof(*ret));
 
 	pthread_mutex_init(&ret->lock, NULL);
 	ret->size = size;
+	ret->align = align;
 	ret->nr_objs = 0;
 	ret->objs = NULL;
 	ret->ctor = ctor;
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
index a037def0dec6..2958830ce4d7 100644
--- a/tools/testing/radix-tree/linux/slab.h
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -20,8 +20,8 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
-struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t offset,
-	unsigned long flags, void (*ctor)(void *));
+struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
+			unsigned int align, unsigned int flags,
+			void (*ctor)(void *));
 
 #endif		/* SLAB_H */
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 7a22d6e3732e..f2cbc8e5b97c 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -311,6 +311,7 @@ int main(int argc, char **argv)
 	regression4_test();
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
+	iteration_test2(10 + 90 * long_run);
 	single_thread_tests(long_run);
 
 	/* Free any remaining preallocated nodes */
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 1ee4b2c0ad10..34dab4d18744 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,7 @@ void xarray_tests(void);
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
+void iteration_test2(unsigned duration);
 void benchmark(void);
 void idr_checks(void);
 void ida_tests(void);
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 61df01cdf0b2..055a5019b13c 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -1,6 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gpiogpio-event-mon
 gpiogpio-hammer
 gpioinclude/
 gpiolsgpio
 tpm2/SpaceTest.log
-tpm2/*.pyc
+
+# Python bytecode and cache
+__pycache__/
+*.py[cod]
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6ec503912bea..2ff68702fd41 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -33,11 +33,12 @@ TARGETS += memory-hotplug
 TARGETS += mount
 TARGETS += mqueue
 TARGETS += net
+TARGETS += net/forwarding
 TARGETS += net/mptcp
 TARGETS += netfilter
-TARGETS += networking/timestamping
 TARGETS += nsfs
 TARGETS += pidfd
+TARGETS += pid_namespace
 TARGETS += powerpc
 TARGETS += proc
 TARGETS += pstore
@@ -91,7 +92,7 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
-# Append kselftest to KBUILD_OUTPUT to avoid cluttering
+# Append kselftest to KBUILD_OUTPUT and O to avoid cluttering
 # KBUILD_OUTPUT with selftest objects and headers installed
 # by selftests Makefile or lib.mk.
 ifdef building_out_of_srctree
@@ -99,7 +100,7 @@ override LDFLAGS =
 endif
 
 ifneq ($(O),)
-	BUILD := $(O)
+	BUILD := $(O)/kselftest
 else
 	ifneq ($(KBUILD_OUTPUT),)
 		BUILD := $(KBUILD_OUTPUT)/kselftest
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index 7c462714b418..9258306cafe9 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -21,7 +21,7 @@ all:
 
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
-	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)  $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 	@for SUBDIR in $(SUBDIRS); do \
 		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
index 95e8f4561474..78eae9972bb1 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ionapp_export
 ionapp_import
 ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index 0eb7ab626e1c..42b71f005332 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -17,4 +17,4 @@ include ../../lib.mk
 
 $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
 $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 3c5b4e8ff894..78c902045ca7 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 mangle_*
 fake_sigreturn_*
 !*.[ch]
diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore
index e8fae8d61ed6..f4f6c5112463 100644
--- a/tools/testing/selftests/arm64/tags/.gitignore
+++ b/tools/testing/selftests/arm64/tags/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tags_test
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index ec464859c6b6..c30079c86998 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 test_verifier
 test_maps
 test_lru_map
@@ -31,6 +32,7 @@ test_tcp_check_syncookie_user
 test_sysctl
 test_hashmap
 test_btf_dump
+test_current_pid_tgid_new_ns
 xdping
 test_cpp
 *.skel.h
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 257a1aaaa37d..7729892e0b04 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -20,8 +20,9 @@ CLANG		?= clang
 LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)		\
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR)		\
 	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)	\
+	  -I$(APIDIR)							\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
 	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lz -lrt -lpthread
@@ -32,7 +33,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_progs-no_alu32
+	test_progs-no_alu32 \
+	test_current_pid_tgid_new_ns
 
 # Also test bpf-gcc, if present
 ifneq ($(BPF_GCC),)
@@ -62,7 +64,8 @@ TEST_PROGS := test_kmod.sh \
 	test_tc_tunnel.sh \
 	test_tc_edt.sh \
 	test_xdping.sh \
-	test_bpftool_build.sh
+	test_bpftool_build.sh \
+	test_bpftool.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
@@ -128,10 +131,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux)			\
-			       /sys/kernel/btf/vmlinux			\
-			       /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../../vmlinux				\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
 $(OUTPUT)/runqslower: $(BPFOBJ)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
 		    OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
@@ -171,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
 	$(call msg,MKDIR,,$@)
 	mkdir -p $@
 
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+	$(call msg,GEN,,$@)
+	$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
 # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
@@ -189,8 +199,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
 BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) 			\
-	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi	\
-	     -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
+	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
+	     -I$(abspath $(OUTPUT)/../usr/include)
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
 	       -Wno-compare-distinct-pointer-types
@@ -209,7 +219,7 @@ define CLANG_BPF_BUILD_RULE
 	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
 	($(CLANG) $3 -O2 -target bpf -emit-llvm				\
 		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+	$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
@@ -223,7 +233,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE
 	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
 	($(CLANG) $3 -O2 -emit-llvm					\
 		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+	$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
@@ -279,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
 $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/%.c			\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
+		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS),	\
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 8f21965ffc6c..5bf2fe9b1efa 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 #define BPF_STRUCT_OPS(name, args...) \
 SEC("struct_ops/"#name) \
diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h
deleted file mode 100644
index c6f1354d93fb..000000000000
--- a/tools/testing/selftests/bpf/bpf_trace_helpers.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __BPF_TRACE_HELPERS_H
-#define __BPF_TRACE_HELPERS_H
-
-#include <bpf/bpf_helpers.h>
-
-#define ___bpf_concat(a, b) a ## b
-#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
-#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
-#define ___bpf_narg(...) \
-	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
-	___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
-
-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
-#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
-#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
-#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
-	___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
-
-/*
- * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
- * similar kinds of BPF programs, that accept input arguments as a single
- * pointer to untyped u64 array, where each u64 can actually be a typed
- * pointer or integer of different size. Instead of requring user to write
- * manual casts and work with array elements by index, BPF_PROG macro
- * allows user to declare a list of named and typed input arguments in the
- * same syntax as for normal C function. All the casting is hidden and
- * performed transparently, while user code can just assume working with
- * function arguments of specified type and name.
- *
- * Original raw context argument is preserved as well as 'ctx' argument.
- * This is useful when using BPF helpers that expect original context
- * as one of the parameters (e.g., for bpf_perf_event_output()).
- */
-#define BPF_PROG(name, args...)						    \
-name(unsigned long long *ctx);						    \
-static __always_inline typeof(name(0))					    \
-____##name(unsigned long long *ctx, ##args);				    \
-typeof(name(0)) name(unsigned long long *ctx)				    \
-{									    \
-	_Pragma("GCC diagnostic push")					    \
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
-	return ____##name(___bpf_ctx_cast(args));			    \
-	_Pragma("GCC diagnostic pop")					    \
-}									    \
-static __always_inline typeof(name(0))					    \
-____##name(unsigned long long *ctx, ##args)
-
-struct pt_regs;
-
-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
-	___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
-	___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
-	___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
-	___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
-	___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
-	___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
-
-/*
- * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
- * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
- * low-level way of getting kprobe input arguments from struct pt_regs, and
- * provides a familiar typed and named function arguments syntax and
- * semantics of accessing kprobe input paremeters.
- *
- * Original struct pt_regs* context is preserved as 'ctx' argument. This might
- * be necessary when using BPF helpers like bpf_perf_event_output().
- */
-#define BPF_KPROBE(name, args...)					    \
-name(struct pt_regs *ctx);						    \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx)				    \
-{									    \
-	_Pragma("GCC diagnostic push")					    \
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
-	return ____##name(___bpf_kprobe_args(args));			    \
-	_Pragma("GCC diagnostic pop")					    \
-}									    \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-
-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_argsN(x, args...) \
-	___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
-#define ___bpf_kretprobe_args(args...) \
-	___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
-
-/*
- * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
- * input kprobe arguments, one last extra argument has to be specified, which
- * captures kprobe return value.
- */
-#define BPF_KRETPROBE(name, args...)					    \
-name(struct pt_regs *ctx);						    \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx)				    \
-{									    \
-	_Pragma("GCC diagnostic push")					    \
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
-	return ____##name(___bpf_kretprobe_args(args));			    \
-	_Pragma("GCC diagnostic pop")					    \
-}									    \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 5dc109f4c097..60e3ae5d4e48 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -35,3 +35,5 @@ CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_IPV6_SIT=m
 CONFIG_BPF_JIT=y
+CONFIG_BPF_LSM=y
+CONFIG_SECURITY=y
diff --git a/tools/testing/selftests/bpf/map_tests/.gitignore b/tools/testing/selftests/bpf/map_tests/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/map_tests/.gitignore
+++ b/tools/testing/selftests/bpf/map_tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tests.h
diff --git a/tools/testing/selftests/bpf/prog_tests/.gitignore b/tools/testing/selftests/bpf/prog_tests/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/prog_tests/.gitignore
+++ b/tools/testing/selftests/bpf/prog_tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tests.h
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8482bbc67eec..9a8f47fc0b91 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -11,6 +11,7 @@
 static const unsigned int total_bytes = 10 * 1024 * 1024;
 static const struct timeval timeo_sec = { .tv_sec = 10 };
 static const size_t timeo_optlen = sizeof(timeo_sec);
+static int expected_stg = 0xeB9F;
 static int stop, duration;
 
 static int settimeo(int fd)
@@ -88,7 +89,7 @@ done:
 	return NULL;
 }
 
-static void do_test(const char *tcp_ca)
+static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 {
 	struct sockaddr_in6 sa6 = {};
 	ssize_t nr_recv = 0, bytes = 0;
@@ -126,14 +127,34 @@ static void do_test(const char *tcp_ca)
 	err = listen(lfd, 1);
 	if (CHECK(err == -1, "listen", "errno:%d\n", errno))
 		goto done;
-	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
-	if (CHECK(err != 0, "pthread_create", "err:%d\n", err))
-		goto done;
+
+	if (sk_stg_map) {
+		err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
+					  &expected_stg, BPF_NOEXIST);
+		if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
+			  "err:%d errno:%d\n", err, errno))
+			goto done;
+	}
 
 	/* connect to server */
 	err = connect(fd, (struct sockaddr *)&sa6, addrlen);
 	if (CHECK(err == -1, "connect", "errno:%d\n", errno))
-		goto wait_thread;
+		goto done;
+
+	if (sk_stg_map) {
+		int tmp_stg;
+
+		err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
+					  &tmp_stg);
+		if (CHECK(!err || errno != ENOENT,
+			  "bpf_map_lookup_elem(sk_stg_map)",
+			  "err:%d errno:%d\n", err, errno))
+			goto done;
+	}
+
+	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+	if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+		goto done;
 
 	/* recv total_bytes */
 	while (bytes < total_bytes && !READ_ONCE(stop)) {
@@ -149,7 +170,6 @@ static void do_test(const char *tcp_ca)
 	CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
 	      bytes, total_bytes, nr_recv, errno);
 
-wait_thread:
 	WRITE_ONCE(stop, 1);
 	pthread_join(srv_thread, &thread_ret);
 	CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
@@ -175,7 +195,7 @@ static void test_cubic(void)
 		return;
 	}
 
-	do_test("bpf_cubic");
+	do_test("bpf_cubic", NULL);
 
 	bpf_link__destroy(link);
 	bpf_cubic__destroy(cubic_skel);
@@ -197,7 +217,10 @@ static void test_dctcp(void)
 		return;
 	}
 
-	do_test("bpf_dctcp");
+	do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
+	CHECK(dctcp_skel->bss->stg_result != expected_stg,
+	      "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
+	      dctcp_skel->bss->stg_result, expected_stg);
 
 	bpf_link__destroy(link);
 	bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 7390d3061065..cb33a7ee4e04 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -125,6 +125,6 @@ void test_btf_dump() {
 		if (!test__start_subtest(t->name))
 			continue;
 
-		 test_btf_dump_case(i, &btf_dump_test_cases[i]);
+		test_btf_dump_case(i, &btf_dump_test_cases[i]);
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 5b13f2c6c402..70e94e783070 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -6,7 +6,7 @@
 
 #define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
 
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
 static int prog_load(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 2ff21dbce179..139f8e82c7c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -6,7 +6,7 @@
 
 #define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
 
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
 static int map_fd = -1;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9d8cb48b99de..9e96f8d87fea 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -8,7 +8,7 @@
 #define BAR		"/foo/bar/"
 #define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
 
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
 static int prog_load(int verdict)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
new file mode 100644
index 000000000000..6e04f8d1d15b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup_link.skel.h"
+
+static __u32 duration = 0;
+#define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+static struct test_cgroup_link *skel = NULL;
+
+int ping_and_check(int exp_calls, int exp_alt_calls)
+{
+	skel->bss->calls = 0;
+	skel->bss->alt_calls = 0;
+	CHECK_FAIL(system(PING_CMD));
+	if (CHECK(skel->bss->calls != exp_calls, "call_cnt",
+		  "exp %d, got %d\n", exp_calls, skel->bss->calls))
+		return -EINVAL;
+	if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt",
+		  "exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls))
+		return -EINVAL;
+	return 0;
+}
+
+void test_cgroup_link(void)
+{
+	struct {
+		const char *path;
+		int fd;
+	} cgs[] = {
+		{ "/cg1" },
+		{ "/cg1/cg2" },
+		{ "/cg1/cg2/cg3" },
+		{ "/cg1/cg2/cg3/cg4" },
+	};
+	int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
+	struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
+	__u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+	int i = 0, err, prog_fd;
+	bool detach_legacy = false;
+
+	skel = test_cgroup_link__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n"))
+		return;
+	prog_fd = bpf_program__fd(skel->progs.egress);
+
+	err = setup_cgroup_environment();
+	if (CHECK(err, "cg_init", "failed: %d\n", err))
+		goto cleanup;
+
+	for (i = 0; i < cg_nr; i++) {
+		cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+		if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+			goto cleanup;
+	}
+
+	err = join_cgroup(cgs[last_cg].path);
+	if (CHECK(err, "cg_join", "fail: %d\n", err))
+		goto cleanup;
+
+	for (i = 0; i < cg_nr; i++) {
+		links[i] = bpf_program__attach_cgroup(skel->progs.egress,
+						      cgs[i].fd);
+		if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
+				 i, PTR_ERR(links[i])))
+			goto cleanup;
+	}
+
+	ping_and_check(cg_nr, 0);
+
+	/* query the number of effective progs and attach flags in root cg */
+	err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
+			     &prog_cnt);
+	CHECK_FAIL(err);
+	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+	if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
+		goto cleanup;
+
+	/* query the number of effective progs in last cg */
+	err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_QUERY_EFFECTIVE, NULL, NULL,
+			     &prog_cnt);
+	CHECK_FAIL(err);
+	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+	if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+		  cg_nr, prog_cnt))
+		goto cleanup;
+
+	/* query the effective prog IDs in last cg */
+	err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_QUERY_EFFECTIVE, &attach_flags,
+			     prog_ids, &prog_cnt);
+	CHECK_FAIL(err);
+	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+	if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+		  cg_nr, prog_cnt))
+		goto cleanup;
+	for (i = 1; i < prog_cnt; i++) {
+		CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check",
+		      "idx %d, prev id %d, cur id %d\n",
+		      i, prog_ids[i - 1], prog_ids[i]);
+	}
+
+	/* detach bottom program and ping again */
+	bpf_link__destroy(links[last_cg]);
+	links[last_cg] = NULL;
+
+	ping_and_check(cg_nr - 1, 0);
+
+	/* mix in with non link-based multi-attachments */
+	err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+			      BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI);
+	if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno))
+		goto cleanup;
+	detach_legacy = true;
+
+	links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+						    cgs[last_cg].fd);
+	if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+		  PTR_ERR(links[last_cg])))
+		goto cleanup;
+
+	ping_and_check(cg_nr + 1, 0);
+
+	/* detach link */
+	bpf_link__destroy(links[last_cg]);
+	links[last_cg] = NULL;
+
+	/* detach legacy */
+	err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+	if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+		goto cleanup;
+	detach_legacy = false;
+
+	/* attach legacy exclusive prog attachment */
+	err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+			      BPF_CGROUP_INET_EGRESS, 0);
+	if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno))
+		goto cleanup;
+	detach_legacy = true;
+
+	/* attempt to mix in with multi-attach bpf_link */
+	tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
+					      cgs[last_cg].fd);
+	if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+		bpf_link__destroy(tmp_link);
+		goto cleanup;
+	}
+
+	ping_and_check(cg_nr, 0);
+
+	/* detach */
+	err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+	if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+		goto cleanup;
+	detach_legacy = false;
+
+	ping_and_check(cg_nr - 1, 0);
+
+	/* attach back link-based one */
+	links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+						    cgs[last_cg].fd);
+	if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+		  PTR_ERR(links[last_cg])))
+		goto cleanup;
+
+	ping_and_check(cg_nr, 0);
+
+	/* check legacy exclusive prog can't be attached */
+	err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+			      BPF_CGROUP_INET_EGRESS, 0);
+	if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) {
+		bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+		goto cleanup;
+	}
+
+	/* replace BPF programs inside their links for all but first link */
+	for (i = 1; i < cg_nr; i++) {
+		err = bpf_link__update_program(links[i], skel->progs.egress_alt);
+		if (CHECK(err, "prog_upd", "link #%d\n", i))
+			goto cleanup;
+	}
+
+	ping_and_check(1, cg_nr - 1);
+
+	/* Attempt program update with wrong expected BPF program */
+	link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt);
+	link_upd_opts.flags = BPF_F_REPLACE;
+	err = bpf_link_update(bpf_link__fd(links[0]),
+			      bpf_program__fd(skel->progs.egress_alt),
+			      &link_upd_opts);
+	if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1",
+		  "unexpectedly succeeded, err %d, errno %d\n", err, -errno))
+		goto cleanup;
+
+	/* Compare-exchange single link program from egress to egress_alt */
+	link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress);
+	link_upd_opts.flags = BPF_F_REPLACE;
+	err = bpf_link_update(bpf_link__fd(links[0]),
+			      bpf_program__fd(skel->progs.egress_alt),
+			      &link_upd_opts);
+	if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno))
+		goto cleanup;
+
+	/* ping */
+	ping_and_check(0, cg_nr);
+
+	/* close cgroup FDs before detaching links */
+	for (i = 0; i < cg_nr; i++) {
+		if (cgs[i].fd > 0) {
+			close(cgs[i].fd);
+			cgs[i].fd = -1;
+		}
+	}
+
+	/* BPF programs should still get called */
+	ping_and_check(0, cg_nr);
+
+	/* leave cgroup and remove them, don't detach programs */
+	cleanup_cgroup_environment();
+
+	/* BPF programs should have been auto-detached */
+	ping_and_check(0, 0);
+
+cleanup:
+	if (detach_legacy)
+		bpf_prog_detach2(prog_fd, cgs[last_cg].fd,
+				 BPF_CGROUP_INET_EGRESS);
+
+	for (i = 0; i < cg_nr; i++) {
+		if (!IS_ERR(links[i]))
+			bpf_link__destroy(links[i]);
+	}
+	test_cgroup_link__destroy(skel);
+
+	for (i = 0; i < cg_nr; i++) {
+		if (cgs[i].fd > 0)
+			close(cgs[i].fd);
+	}
+	cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 235ac4f67f5b..83493bd5745c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,22 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
-#include "test_pkt_access.skel.h"
 #include "fentry_test.skel.h"
 #include "fexit_test.skel.h"
 
 void test_fentry_fexit(void)
 {
-	struct test_pkt_access *pkt_skel = NULL;
 	struct fentry_test *fentry_skel = NULL;
 	struct fexit_test *fexit_skel = NULL;
 	__u64 *fentry_res, *fexit_res;
 	__u32 duration = 0, retval;
-	int err, pkt_fd, i;
+	int err, prog_fd, i;
 
-	pkt_skel = test_pkt_access__open_and_load();
-	if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
-		return;
 	fentry_skel = fentry_test__open_and_load();
 	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
 		goto close_prog;
@@ -31,8 +26,8 @@ void test_fentry_fexit(void)
 	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
 		goto close_prog;
 
-	pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+	prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	CHECK(err || retval, "ipv6",
 	      "err %d errno %d retval %d duration %d\n",
@@ -49,7 +44,6 @@ void test_fentry_fexit(void)
 	}
 
 close_prog:
-	test_pkt_access__destroy(pkt_skel);
 	fentry_test__destroy(fentry_skel);
 	fexit_test__destroy(fexit_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 5cc06021f27d..04ebbf1cb390 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,20 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
-#include "test_pkt_access.skel.h"
 #include "fentry_test.skel.h"
 
 void test_fentry_test(void)
 {
-	struct test_pkt_access *pkt_skel = NULL;
 	struct fentry_test *fentry_skel = NULL;
-	int err, pkt_fd, i;
+	int err, prog_fd, i;
 	__u32 duration = 0, retval;
 	__u64 *result;
 
-	pkt_skel = test_pkt_access__open_and_load();
-	if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
-		return;
 	fentry_skel = fentry_test__open_and_load();
 	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
 		goto cleanup;
@@ -23,10 +18,10 @@ void test_fentry_test(void)
 	if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
 		goto cleanup;
 
-	pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+	prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
+	CHECK(err || retval, "test_run",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 
@@ -39,5 +34,4 @@ void test_fentry_test(void)
 
 cleanup:
 	fentry_test__destroy(fentry_skel);
-	test_pkt_access__destroy(pkt_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d2c3655dd7a3..78d7a2765c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,64 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
+#include "fexit_test.skel.h"
 
 void test_fexit_test(void)
 {
-	struct bpf_prog_load_attr attr = {
-		.file = "./fexit_test.o",
-	};
-
-	char prog_name[] = "fexit/bpf_fentry_testX";
-	struct bpf_object *obj = NULL, *pkt_obj;
-	int err, pkt_fd, kfree_skb_fd, i;
-	struct bpf_link *link[6] = {};
-	struct bpf_program *prog[6];
+	struct fexit_test *fexit_skel = NULL;
+	int err, prog_fd, i;
 	__u32 duration = 0, retval;
-	struct bpf_map *data_map;
-	const int zero = 0;
-	u64 result[6];
+	__u64 *result;
 
-	err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &pkt_obj, &pkt_fd);
-	if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
-		return;
-	err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
-	if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
-		goto close_prog;
+	fexit_skel = fexit_test__open_and_load();
+	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+		goto cleanup;
 
-	for (i = 0; i < 6; i++) {
-		prog_name[sizeof(prog_name) - 2] = '1' + i;
-		prog[i] = bpf_object__find_program_by_title(obj, prog_name);
-		if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
-			goto close_prog;
-		link[i] = bpf_program__attach_trace(prog[i]);
-		if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
-			goto close_prog;
-	}
-	data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
-	if (CHECK(!data_map, "find_data_map", "data map not found\n"))
-		goto close_prog;
+	err = fexit_test__attach(fexit_skel);
+	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+		goto cleanup;
 
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+	prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
+	CHECK(err || retval, "test_run",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 
-	err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
-	if (CHECK(err, "get_result",
-		  "failed to get output data: %d\n", err))
-		goto close_prog;
-
-	for (i = 0; i < 6; i++)
-		if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
-			  i + 1, result[i]))
-			goto close_prog;
+	result = (__u64 *)fexit_skel->bss;
+	for (i = 0; i < 6; i++) {
+		if (CHECK(result[i] != 1, "result",
+			  "fexit_test%d failed err %lld\n", i + 1, result[i]))
+			goto cleanup;
+	}
 
-close_prog:
-	for (i = 0; i < 6; i++)
-		if (!IS_ERR_OR_NULL(link[i]))
-			bpf_link__destroy(link[i]);
-	bpf_object__close(obj);
-	bpf_object__close(pkt_obj);
+cleanup:
+	fexit_test__destroy(fexit_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index eba9a970703b..925722217edf 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -82,6 +82,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 void test_get_stack_raw_tp(void)
 {
 	const char *file = "./test_get_stack_rawtp.o";
+	const char *file_err = "./test_get_stack_rawtp_err.o";
 	const char *prog_name = "raw_tracepoint/sys_enter";
 	int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
 	struct perf_buffer_opts pb_opts = {};
@@ -93,6 +94,10 @@ void test_get_stack_raw_tp(void)
 	struct bpf_map *map;
 	cpu_set_t cpu_set;
 
+	err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
 	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
new file mode 100644
index 000000000000..3bdaa5a40744
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_global_data_init(void)
+{
+	const char *file = "./test_global_data.o";
+	int err = -ENOMEM, map_fd, zero = 0;
+	__u8 *buff = NULL, *newval = NULL;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+        __u32 duration = 0;
+	size_t sz;
+
+	obj = bpf_object__open_file(file, NULL);
+	if (CHECK_FAIL(!obj))
+		return;
+
+	map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+	if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+		goto out;
+
+	sz = bpf_map__def(map)->value_size;
+	newval = malloc(sz);
+	if (CHECK_FAIL(!newval))
+		goto out;
+
+	memset(newval, 0, sz);
+	/* wrong size, should fail */
+	err = bpf_map__set_initial_value(map, newval, sz - 1);
+	if (CHECK(!err, "reject set initial value wrong size", "err %d\n", err))
+		goto out;
+
+	err = bpf_map__set_initial_value(map, newval, sz);
+	if (CHECK(err, "set initial value", "err %d\n", err))
+		goto out;
+
+	err = bpf_object__load(obj);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	map_fd = bpf_map__fd(map);
+	if (CHECK_FAIL(map_fd < 0))
+		goto out;
+
+	buff = malloc(sz);
+	if (buff)
+		err = bpf_map_lookup_elem(map_fd, &zero, buff);
+	if (CHECK(!buff || err || memcmp(buff, newval, sz),
+		  "compare .rodata map data override",
+		  "err %d errno %d\n", err, errno))
+		goto out;
+
+	memset(newval, 1, sz);
+	/* object loaded - should fail */
+	err = bpf_map__set_initial_value(map, newval, sz);
+	CHECK(!err, "reject set initial value after load", "err %d\n", err);
+out:
+	free(buff);
+	free(newval);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
new file mode 100644
index 000000000000..a743288cf384
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <sys/stat.h>
+
+#include "test_link_pinning.skel.h"
+
+static int duration = 0;
+
+void test_link_pinning_subtest(struct bpf_program *prog,
+			       struct test_link_pinning__bss *bss)
+{
+	const char *link_pin_path = "/sys/fs/bpf/pinned_link_test";
+	struct stat statbuf = {};
+	struct bpf_link *link;
+	int err, i;
+
+	link = bpf_program__attach(prog);
+	if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+
+	bss->in = 1;
+	usleep(1);
+	CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out);
+
+	/* pin link */
+	err = bpf_link__pin(link, link_pin_path);
+	if (CHECK(err, "link_pin", "err: %d\n", err))
+		goto cleanup;
+
+	CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1",
+	      "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+	/* check that link was pinned */
+	err = stat(link_pin_path, &statbuf);
+	if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno))
+		goto cleanup;
+
+	bss->in = 2;
+	usleep(1);
+	CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out);
+
+	/* destroy link, pinned link should keep program attached */
+	bpf_link__destroy(link);
+	link = NULL;
+
+	bss->in = 3;
+	usleep(1);
+	CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out);
+
+	/* re-open link from BPFFS */
+	link = bpf_link__open(link_pin_path);
+	if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+
+	CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
+	      "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+	/* unpin link from BPFFS, program still attached */
+	err = bpf_link__unpin(link);
+	if (CHECK(err, "link_unpin", "err: %d\n", err))
+		goto cleanup;
+
+	/* still active, as we have FD open now */
+	bss->in = 4;
+	usleep(1);
+	CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out);
+
+	bpf_link__destroy(link);
+	link = NULL;
+
+	/* Validate it's finally detached.
+	 * Actual detachment might get delayed a bit, so there is no reliable
+	 * way to validate it immediately here, let's count up for long enough
+	 * and see if eventually output stops being updated
+	 */
+	for (i = 5; i < 10000; i++) {
+		bss->in = i;
+		usleep(1);
+		if (bss->out == i - 1)
+			break;
+	}
+	CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
+
+cleanup:
+	if (!IS_ERR(link))
+		bpf_link__destroy(link);
+}
+
+void test_link_pinning(void)
+{
+	struct test_link_pinning* skel;
+
+	skel = test_link_pinning__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	if (test__start_subtest("pin_raw_tp"))
+		test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss);
+	if (test__start_subtest("pin_tp_btf"))
+		test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss);
+
+	test_link_pinning__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
new file mode 100644
index 000000000000..97fec70c600b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include "modify_return.skel.h"
+
+#define LOWER(x) ((x) & 0xffff)
+#define UPPER(x) ((x) >> 16)
+
+
+static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
+{
+	struct modify_return *skel = NULL;
+	int err, prog_fd;
+	__u32 duration = 0, retval;
+	__u16 side_effect;
+	__s16 ret;
+
+	skel = modify_return__open_and_load();
+	if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+		goto cleanup;
+
+	err = modify_return__attach(skel);
+	if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+		goto cleanup;
+
+	skel->bss->input_retval = input_retval;
+	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+	err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
+				&retval, &duration);
+
+	CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+
+	side_effect = UPPER(retval);
+	ret  = LOWER(retval);
+
+	CHECK(ret != want_ret, "test_run",
+	      "unexpected ret: %d, expected: %d\n", ret, want_ret);
+	CHECK(side_effect != want_side_effect, "modify_return",
+	      "unexpected side_effect: %d\n", side_effect);
+
+	CHECK(skel->bss->fentry_result != 1, "modify_return",
+	      "fentry failed\n");
+	CHECK(skel->bss->fexit_result != 1, "modify_return",
+	      "fexit failed\n");
+	CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
+	      "fmod_ret failed\n");
+
+cleanup:
+	modify_return__destroy(skel);
+}
+
+void test_modify_return(void)
+{
+	run_test(0 /* input_retval */,
+		 1 /* want_side_effect */,
+		 4 /* want_ret */);
+	run_test(-EINVAL /* input_retval */,
+		 0 /* want_side_effect */,
+		 -EINVAL /* want_ret */);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
new file mode 100644
index 000000000000..542240e16564
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+struct bss {
+	__u64 dev;
+	__u64 ino;
+	__u64 pid_tgid;
+	__u64 user_pid_tgid;
+};
+
+void test_ns_current_pid_tgid(void)
+{
+	const char *probe_name = "raw_tracepoint/sys_enter";
+	const char *file = "test_ns_current_pid_tgid.o";
+	int err, key = 0, duration = 0;
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_map *bss_map;
+	struct bpf_object *obj;
+	struct bss bss;
+	struct stat st;
+	__u64 id;
+
+	obj = bpf_object__open_file(file, NULL);
+	if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+		return;
+
+	err = bpf_object__load(obj);
+	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+		goto cleanup;
+
+	bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+	if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_title(obj, probe_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
+		  probe_name))
+		goto cleanup;
+
+	memset(&bss, 0, sizeof(bss));
+	pid_t tid = syscall(SYS_gettid);
+	pid_t pid = getpid();
+
+	id = (__u64) tid << 32 | pid;
+	bss.user_pid_tgid = id;
+
+	if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
+		perror("Failed to stat /proc/self/ns/pid");
+		goto cleanup;
+	}
+
+	bss.dev = st.st_dev;
+	bss.ino = st.st_ino;
+
+	err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+	if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
+		goto cleanup;
+
+	link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+		  PTR_ERR(link))) {
+		link = NULL;
+		goto cleanup;
+	}
+
+	/* trigger some syscalls */
+	usleep(1);
+
+	err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+	if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
+		goto cleanup;
+
+	if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+		  "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+		goto cleanup;
+cleanup:
+	if (!link) {
+		bpf_link__destroy(link);
+		link = NULL;
+	}
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
new file mode 100644
index 000000000000..e35c444902a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+#include "test_perf_branches.skel.h"
+
+static void check_good_sample(struct test_perf_branches *skel)
+{
+	int written_global = skel->bss->written_global_out;
+	int required_size = skel->bss->required_size_out;
+	int written_stack = skel->bss->written_stack_out;
+	int pbe_size = sizeof(struct perf_branch_entry);
+	int duration = 0;
+
+	if (CHECK(!skel->bss->valid, "output not valid",
+		 "no valid sample from prog"))
+		return;
+
+	/*
+	 * It's hard to validate the contents of the branch entries b/c it
+	 * would require some kind of disassembler and also encoding the
+	 * valid jump instructions for supported architectures. So just check
+	 * the easy stuff for now.
+	 */
+	CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size);
+	CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack);
+	CHECK(written_stack % pbe_size != 0, "read_branches_stack",
+	      "stack bytes written=%d not multiple of struct size=%d\n",
+	      written_stack, pbe_size);
+	CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global);
+	CHECK(written_global % pbe_size != 0, "read_branches_global",
+	      "global bytes written=%d not multiple of struct size=%d\n",
+	      written_global, pbe_size);
+	CHECK(written_global < written_stack, "read_branches_size",
+	      "written_global=%d < written_stack=%d\n", written_global, written_stack);
+}
+
+static void check_bad_sample(struct test_perf_branches *skel)
+{
+	int written_global = skel->bss->written_global_out;
+	int required_size = skel->bss->required_size_out;
+	int written_stack = skel->bss->written_stack_out;
+	int duration = 0;
+
+	if (CHECK(!skel->bss->valid, "output not valid",
+		 "no valid sample from prog"))
+		return;
+
+	CHECK((required_size != -EINVAL && required_size != -ENOENT),
+	      "read_branches_size", "err %d\n", required_size);
+	CHECK((written_stack != -EINVAL && written_stack != -ENOENT),
+	      "read_branches_stack", "written %d\n", written_stack);
+	CHECK((written_global != -EINVAL && written_global != -ENOENT),
+	      "read_branches_global", "written %d\n", written_global);
+}
+
+static void test_perf_branches_common(int perf_fd,
+				      void (*cb)(struct test_perf_branches *))
+{
+	struct test_perf_branches *skel;
+	int err, i, duration = 0;
+	bool detached = false;
+	struct bpf_link *link;
+	volatile int j = 0;
+	cpu_set_t cpu_set;
+
+	skel = test_perf_branches__open_and_load();
+	if (CHECK(!skel, "test_perf_branches_load",
+		  "perf_branches skeleton failed\n"))
+		return;
+
+	/* attach perf_event */
+	link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
+	if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+		goto out_destroy_skel;
+
+	/* generate some branches on cpu 0 */
+	CPU_ZERO(&cpu_set);
+	CPU_SET(0, &cpu_set);
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
+		goto out_destroy;
+	/* spin the loop for a while (random high number) */
+	for (i = 0; i < 1000000; ++i)
+		++j;
+
+	test_perf_branches__detach(skel);
+	detached = true;
+
+	cb(skel);
+out_destroy:
+	bpf_link__destroy(link);
+out_destroy_skel:
+	if (!detached)
+		test_perf_branches__detach(skel);
+	test_perf_branches__destroy(skel);
+}
+
+static void test_perf_branches_hw(void)
+{
+	struct perf_event_attr attr = {0};
+	int duration = 0;
+	int pfd;
+
+	/* create perf event */
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.config = PERF_COUNT_HW_CPU_CYCLES;
+	attr.freq = 1;
+	attr.sample_freq = 4000;
+	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+	attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+
+	/*
+	 * Some setups don't support branch records (virtual machines, !x86),
+	 * so skip test in this case.
+	 */
+	if (pfd == -1) {
+		if (errno == ENOENT || errno == EOPNOTSUPP) {
+			printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
+			       __func__);
+			test__skip();
+			return;
+		}
+		if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n",
+			  pfd, errno))
+			return;
+	}
+
+	test_perf_branches_common(pfd, check_good_sample);
+
+	close(pfd);
+}
+
+/*
+ * Tests negative case -- run bpf_read_branch_records() on improperly configured
+ * perf event.
+ */
+static void test_perf_branches_no_hw(void)
+{
+	struct perf_event_attr attr = {0};
+	int duration = 0;
+	int pfd;
+
+	/* create perf event */
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_CPU_CLOCK;
+	attr.freq = 1;
+	attr.sample_freq = 4000;
+	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+	if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
+		return;
+
+	test_perf_branches_common(pfd, check_bad_sample);
+
+	close(pfd);
+}
+
+void test_perf_branches(void)
+{
+	if (test__start_subtest("perf_branches_hw"))
+		test_perf_branches_hw();
+	if (test__start_subtest("perf_branches_no_hw"))
+		test_perf_branches_no_hw();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 0800036ed654..821b4146b7b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
 static __u32 expected_results[NR_RESULTS];
 static int sk_fds[REUSEPORT_ARRAY_SIZE];
 static int reuseport_array = -1, outer_map = -1;
+static enum bpf_map_type inner_map_type;
 static int select_by_skb_data_prog;
 static int saved_tcp_syncookie = -1;
 static struct bpf_object *obj;
@@ -63,13 +64,15 @@ static union sa46 {
 	}								\
 })
 
-static int create_maps(void)
+static int create_maps(enum bpf_map_type inner_type)
 {
 	struct bpf_create_map_attr attr = {};
 
+	inner_map_type = inner_type;
+
 	/* Creating reuseport_array */
 	attr.name = "reuseport_array";
-	attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+	attr.map_type = inner_type;
 	attr.key_size = sizeof(__u32);
 	attr.value_size = sizeof(__u32);
 	attr.max_entries = REUSEPORT_ARRAY_SIZE;
@@ -506,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family)
 		.pass_on_failure = 0,
 	};
 
-	if (type != SOCK_STREAM) {
-		test__skip();
-		return;
-	}
-
 	/*
 	 * +1 for TCP-SYN and
 	 * +1 for the TCP-ACK (ack the syncookie)
@@ -728,12 +726,36 @@ static void cleanup_per_test(bool no_inner_map)
 
 static void cleanup(void)
 {
-	if (outer_map != -1)
+	if (outer_map != -1) {
 		close(outer_map);
-	if (reuseport_array != -1)
+		outer_map = -1;
+	}
+
+	if (reuseport_array != -1) {
 		close(reuseport_array);
-	if (obj)
+		reuseport_array = -1;
+	}
+
+	if (obj) {
 		bpf_object__close(obj);
+		obj = NULL;
+	}
+
+	memset(expected_results, 0, sizeof(expected_results));
+}
+
+static const char *maptype_str(enum bpf_map_type type)
+{
+	switch (type) {
+	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+		return "reuseport_sockarray";
+	case BPF_MAP_TYPE_SOCKMAP:
+		return "sockmap";
+	case BPF_MAP_TYPE_SOCKHASH:
+		return "sockhash";
+	default:
+		return "unknown";
+	}
 }
 
 static const char *family_str(sa_family_t family)
@@ -760,7 +782,7 @@ static const char *sotype_str(int sotype)
 	}
 }
 
-#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
+#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
 
 static void test_config(int sotype, sa_family_t family, bool inany)
 {
@@ -768,12 +790,15 @@ static void test_config(int sotype, sa_family_t family, bool inany)
 		void (*fn)(int sotype, sa_family_t family);
 		const char *name;
 		bool no_inner_map;
+		int need_sotype;
 	} tests[] = {
-		TEST_INIT(test_err_inner_map, true /* no_inner_map */),
+		TEST_INIT(test_err_inner_map,
+			  .no_inner_map = true),
 		TEST_INIT(test_err_skb_data),
 		TEST_INIT(test_err_sk_select_port),
 		TEST_INIT(test_pass),
-		TEST_INIT(test_syncookie),
+		TEST_INIT(test_syncookie,
+			  .need_sotype = SOCK_STREAM),
 		TEST_INIT(test_pass_on_err),
 		TEST_INIT(test_detach_bpf),
 	};
@@ -781,7 +806,11 @@ static void test_config(int sotype, sa_family_t family, bool inany)
 	const struct test *t;
 
 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
-		snprintf(s, sizeof(s), "%s/%s %s %s",
+		if (t->need_sotype && t->need_sotype != sotype)
+			continue; /* test not compatible with socket type */
+
+		snprintf(s, sizeof(s), "%s %s/%s %s %s",
+			 maptype_str(inner_map_type),
 			 family_str(family), sotype_str(sotype),
 			 inany ? "INANY" : "LOOPBACK", t->name);
 
@@ -816,13 +845,20 @@ static void test_all(void)
 		test_config(c->sotype, c->family, c->inany);
 }
 
-void test_select_reuseport(void)
+void test_map_type(enum bpf_map_type mt)
 {
-	if (create_maps())
+	if (create_maps(mt))
 		goto out;
 	if (prepare_bpf_obj())
 		goto out;
 
+	test_all();
+out:
+	cleanup();
+}
+
+void test_select_reuseport(void)
+{
 	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
 	if (saved_tcp_fo < 0)
 		goto out;
@@ -835,8 +871,9 @@ void test_select_reuseport(void)
 	if (disable_syncookie())
 		goto out;
 
-	test_all();
+	test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	test_map_type(BPF_MAP_TYPE_SOCKMAP);
+	test_map_type(BPF_MAP_TYPE_SOCKHASH);
 out:
-	cleanup();
 	restore_sysctls();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
new file mode 100644
index 000000000000..189a34a7addb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "test_send_signal_kern.skel.h"
+
+static void sigusr1_handler(int signum)
+{
+}
+
+#define THREAD_COUNT 100
+
+static void *worker(void *p)
+{
+	int i;
+
+	for ( i = 0; i < 1000; i++)
+		usleep(1);
+
+	return NULL;
+}
+
+void test_send_signal_sched_switch(void)
+{
+	struct test_send_signal_kern *skel;
+	pthread_t threads[THREAD_COUNT];
+	u32 duration = 0;
+	int i, err;
+
+	signal(SIGUSR1, sigusr1_handler);
+
+	skel = test_send_signal_kern__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+		return;
+
+	skel->bss->pid = getpid();
+	skel->bss->sig = SIGUSR1;
+
+	err = test_send_signal_kern__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+		goto destroy_skel;
+
+	for (i = 0; i < THREAD_COUNT; i++) {
+		err = pthread_create(threads + i, NULL, worker, NULL);
+		if (CHECK(err, "pthread_create", "Error creating thread, %s\n",
+			  strerror(errno)))
+			goto destroy_skel;
+	}
+
+	for (i = 0; i < THREAD_COUNT; i++)
+		pthread_join(threads[i], NULL);
+
+destroy_skel:
+	test_send_signal_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
new file mode 100644
index 000000000000..d572e1a2c297
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+// Copyright (c) 2020 Isovalent, Inc.
+/*
+ * Test that the socket assign program is able to redirect traffic towards a
+ * socket, regardless of whether the port or address destination of the traffic
+ * matches the port.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+
+#define BIND_PORT 1234
+#define CONNECT_PORT 4321
+#define TEST_DADDR (0xC0A80203)
+#define NS_SELF "/proc/self/ns/net"
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+static int stop, duration;
+
+static bool
+configure_stack(void)
+{
+	char tc_cmd[BUFSIZ];
+
+	/* Move to a new networking namespace */
+	if (CHECK_FAIL(unshare(CLONE_NEWNET)))
+		return false;
+
+	/* Configure necessary links, routes */
+	if (CHECK_FAIL(system("ip link set dev lo up")))
+		return false;
+	if (CHECK_FAIL(system("ip route add local default dev lo")))
+		return false;
+	if (CHECK_FAIL(system("ip -6 route add local default dev lo")))
+		return false;
+
+	/* Load qdisc, BPF program */
+	if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
+		return false;
+	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
+		       "direct-action object-file ./test_sk_assign.o",
+		       "section classifier/sk_assign_test",
+		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+	if (CHECK(system(tc_cmd), "BPF load failed;",
+		  "run with -vv for more info\n"))
+		return false;
+
+	return true;
+}
+
+static int
+start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+	int fd;
+
+	fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		goto out;
+	if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+				  timeo_optlen)))
+		goto close_out;
+	if (CHECK_FAIL(bind(fd, addr, len) == -1))
+		goto close_out;
+	if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+		goto close_out;
+
+	goto out;
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int
+connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+	int fd = -1;
+
+	fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		goto out;
+	if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
+				  timeo_optlen)))
+		goto close_out;
+	if (CHECK_FAIL(connect(fd, addr, len)))
+		goto close_out;
+
+	goto out;
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static in_port_t
+get_port(int fd)
+{
+	struct sockaddr_storage ss;
+	socklen_t slen = sizeof(ss);
+	in_port_t port = 0;
+
+	if (CHECK_FAIL(getsockname(fd, (struct sockaddr *)&ss, &slen)))
+		return port;
+
+	switch (ss.ss_family) {
+	case AF_INET:
+		port = ((struct sockaddr_in *)&ss)->sin_port;
+		break;
+	case AF_INET6:
+		port = ((struct sockaddr_in6 *)&ss)->sin6_port;
+		break;
+	default:
+		CHECK(1, "Invalid address family", "%d\n", ss.ss_family);
+	}
+	return port;
+}
+
+static ssize_t
+rcv_msg(int srv_client, int type)
+{
+	struct sockaddr_storage ss;
+	char buf[BUFSIZ];
+	socklen_t slen;
+
+	if (type == SOCK_STREAM)
+		return read(srv_client, &buf, sizeof(buf));
+	else
+		return recvfrom(srv_client, &buf, sizeof(buf), 0,
+				(struct sockaddr *)&ss, &slen);
+}
+
+static int
+run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
+{
+	int client = -1, srv_client = -1;
+	char buf[] = "testing";
+	in_port_t port;
+	int ret = 1;
+
+	client = connect_to_server(addr, len, type);
+	if (client == -1) {
+		perror("Cannot connect to server");
+		goto out;
+	}
+
+	if (type == SOCK_STREAM) {
+		srv_client = accept(server_fd, NULL, NULL);
+		if (CHECK_FAIL(srv_client == -1)) {
+			perror("Can't accept connection");
+			goto out;
+		}
+	} else {
+		srv_client = server_fd;
+	}
+	if (CHECK_FAIL(write(client, buf, sizeof(buf)) != sizeof(buf))) {
+		perror("Can't write on client");
+		goto out;
+	}
+	if (CHECK_FAIL(rcv_msg(srv_client, type) != sizeof(buf))) {
+		perror("Can't read on server");
+		goto out;
+	}
+
+	port = get_port(srv_client);
+	if (CHECK_FAIL(!port))
+		goto out;
+	/* SOCK_STREAM is connected via accept(), so the server's local address
+	 * will be the CONNECT_PORT rather than the BIND port that corresponds
+	 * to the listen socket. SOCK_DGRAM on the other hand is connectionless
+	 * so we can't really do the same check there; the server doesn't ever
+	 * create a socket with CONNECT_PORT.
+	 */
+	if (type == SOCK_STREAM &&
+	    CHECK(port != htons(CONNECT_PORT), "Expected", "port %u but got %u",
+		  CONNECT_PORT, ntohs(port)))
+		goto out;
+	else if (type == SOCK_DGRAM &&
+		 CHECK(port != htons(BIND_PORT), "Expected",
+		       "port %u but got %u", BIND_PORT, ntohs(port)))
+		goto out;
+
+	ret = 0;
+out:
+	close(client);
+	if (srv_client != server_fd)
+		close(srv_client);
+	if (ret)
+		WRITE_ONCE(stop, 1);
+	return ret;
+}
+
+static void
+prepare_addr(struct sockaddr *addr, int family, __u16 port, bool rewrite_addr)
+{
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = family;
+		addr4->sin_port = htons(port);
+		if (rewrite_addr)
+			addr4->sin_addr.s_addr = htonl(TEST_DADDR);
+		else
+			addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)addr;
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = family;
+		addr6->sin6_port = htons(port);
+		addr6->sin6_addr = in6addr_loopback;
+		if (rewrite_addr)
+			addr6->sin6_addr.s6_addr32[3] = htonl(TEST_DADDR);
+		break;
+	default:
+		fprintf(stderr, "Invalid family %d", family);
+	}
+}
+
+struct test_sk_cfg {
+	const char *name;
+	int family;
+	struct sockaddr *addr;
+	socklen_t len;
+	int type;
+	bool rewrite_addr;
+};
+
+#define TEST(NAME, FAMILY, TYPE, REWRITE)				\
+{									\
+	.name = NAME,							\
+	.family = FAMILY,						\
+	.addr = (FAMILY == AF_INET) ? (struct sockaddr *)&addr4		\
+				    : (struct sockaddr *)&addr6,	\
+	.len = (FAMILY == AF_INET) ? sizeof(addr4) : sizeof(addr6),	\
+	.type = TYPE,							\
+	.rewrite_addr = REWRITE,					\
+}
+
+void test_sk_assign(void)
+{
+	struct sockaddr_in addr4;
+	struct sockaddr_in6 addr6;
+	struct test_sk_cfg tests[] = {
+		TEST("ipv4 tcp port redir", AF_INET, SOCK_STREAM, false),
+		TEST("ipv4 tcp addr redir", AF_INET, SOCK_STREAM, true),
+		TEST("ipv6 tcp port redir", AF_INET6, SOCK_STREAM, false),
+		TEST("ipv6 tcp addr redir", AF_INET6, SOCK_STREAM, true),
+		TEST("ipv4 udp port redir", AF_INET, SOCK_DGRAM, false),
+		TEST("ipv4 udp addr redir", AF_INET, SOCK_DGRAM, true),
+		TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
+		TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
+	};
+	int server = -1;
+	int self_net;
+
+	self_net = open(NS_SELF, O_RDONLY);
+	if (CHECK_FAIL(self_net < 0)) {
+		perror("Unable to open "NS_SELF);
+		return;
+	}
+
+	if (!configure_stack()) {
+		perror("configure_stack");
+		goto cleanup;
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+		struct test_sk_cfg *test = &tests[i];
+		const struct sockaddr *addr;
+
+		if (!test__start_subtest(test->name))
+			continue;
+		prepare_addr(test->addr, test->family, BIND_PORT, false);
+		addr = (const struct sockaddr *)test->addr;
+		server = start_server(addr, test->len, test->type);
+		if (server == -1)
+			goto cleanup;
+
+		/* connect to unbound ports */
+		prepare_addr(test->addr, test->family, CONNECT_PORT,
+			     test->rewrite_addr);
+		if (run_test(server, addr, test->len, test->type))
+			goto close;
+
+		close(server);
+		server = -1;
+	}
+
+close:
+	close(server);
+cleanup:
+	if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
+		perror("Failed to setns("NS_SELF")");
+	close(self_net);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index c6d6b685a946..4538bd08203f 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -14,6 +14,7 @@ void test_skb_ctx(void)
 		.wire_len = 100,
 		.gso_segs = 8,
 		.mark = 9,
+		.gso_size = 10,
 	};
 	struct bpf_prog_test_run_attr tattr = {
 		.data_in = &pkt_v4,
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
new file mode 100644
index 000000000000..06b86addc181
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Tests for sockmap/sockhash holding kTLS sockets.
+ */
+
+#include "test_progs.h"
+
+#define MAX_TEST_NAME 80
+#define TCP_ULP 31
+
+static int tcp_server(int family)
+{
+	int err, s;
+
+	s = socket(family, SOCK_STREAM, 0);
+	if (CHECK_FAIL(s == -1)) {
+		perror("socket");
+		return -1;
+	}
+
+	err = listen(s, SOMAXCONN);
+	if (CHECK_FAIL(err)) {
+		perror("listen");
+		return -1;
+	}
+
+	return s;
+}
+
+static int disconnect(int fd)
+{
+	struct sockaddr unspec = { AF_UNSPEC };
+
+	return connect(fd, &unspec, sizeof(unspec));
+}
+
+/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
+static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+{
+	struct sockaddr_storage addr = {0};
+	socklen_t len = sizeof(addr);
+	int err, cli, srv, zero = 0;
+
+	srv = tcp_server(family);
+	if (srv == -1)
+		return;
+
+	err = getsockname(srv, (struct sockaddr *)&addr, &len);
+	if (CHECK_FAIL(err)) {
+		perror("getsockopt");
+		goto close_srv;
+	}
+
+	cli = socket(family, SOCK_STREAM, 0);
+	if (CHECK_FAIL(cli == -1)) {
+		perror("socket");
+		goto close_srv;
+	}
+
+	err = connect(cli, (struct sockaddr *)&addr, len);
+	if (CHECK_FAIL(err)) {
+		perror("connect");
+		goto close_cli;
+	}
+
+	err = bpf_map_update_elem(map, &zero, &cli, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_map_update_elem");
+		goto close_cli;
+	}
+
+	err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+	if (CHECK_FAIL(err)) {
+		perror("setsockopt(TCP_ULP)");
+		goto close_cli;
+	}
+
+	err = bpf_map_delete_elem(map, &zero);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_map_delete_elem");
+		goto close_cli;
+	}
+
+	err = disconnect(cli);
+	if (CHECK_FAIL(err))
+		perror("disconnect");
+
+close_cli:
+	close(cli);
+close_srv:
+	close(srv);
+}
+
+static void run_tests(int family, enum bpf_map_type map_type)
+{
+	char test_name[MAX_TEST_NAME];
+	int map;
+
+	map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+	if (CHECK_FAIL(map == -1)) {
+		perror("bpf_map_create");
+		return;
+	}
+
+	snprintf(test_name, MAX_TEST_NAME,
+		 "sockmap_ktls disconnect_after_delete %s %s",
+		 family == AF_INET ? "IPv4" : "IPv6",
+		 map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
+	if (!test__start_subtest(test_name))
+		return;
+
+	test_sockmap_ktls_disconnect_after_delete(family, map);
+
+	close(map);
+}
+
+void test_sockmap_ktls(void)
+{
+	run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
+	run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
+	run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
+	run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
new file mode 100644
index 000000000000..d7d65a700799
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -0,0 +1,1635 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
+ * Covers:
+ *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
+ *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
+ *  3. BPF reuseport helper - bpf_sk_select_reuseport
+ */
+
+#include <linux/compiler.h>
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/select.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "test_progs.h"
+#include "test_sockmap_listen.skel.h"
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+#define _FAIL(errnum, fmt...)                                                  \
+	({                                                                     \
+		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
+		CHECK_FAIL(true);                                              \
+	})
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg)                                                  \
+	({                                                                     \
+		char __buf[MAX_STRERR_LEN];                                    \
+		libbpf_strerror((err), __buf, sizeof(__buf));                  \
+		FAIL("%s: %s", (msg), __buf);                                  \
+	})
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len)                                        \
+	({                                                                     \
+		int __ret =                                                    \
+			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("accept");                                  \
+		__ret;                                                         \
+	})
+
+#define xbind(fd, addr, len)                                                   \
+	({                                                                     \
+		int __ret = bind((fd), (addr), (len));                         \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("bind");                                    \
+		__ret;                                                         \
+	})
+
+#define xclose(fd)                                                             \
+	({                                                                     \
+		int __ret = close((fd));                                       \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("close");                                   \
+		__ret;                                                         \
+	})
+
+#define xconnect(fd, addr, len)                                                \
+	({                                                                     \
+		int __ret = connect((fd), (addr), (len));                      \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("connect");                                 \
+		__ret;                                                         \
+	})
+
+#define xgetsockname(fd, addr, len)                                            \
+	({                                                                     \
+		int __ret = getsockname((fd), (addr), (len));                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockname");                             \
+		__ret;                                                         \
+	})
+
+#define xgetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xlisten(fd, backlog)                                                   \
+	({                                                                     \
+		int __ret = listen((fd), (backlog));                           \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("listen");                                  \
+		__ret;                                                         \
+	})
+
+#define xsetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("setsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xsend(fd, buf, len, flags)                                             \
+	({                                                                     \
+		ssize_t __ret = send((fd), (buf), (len), (flags));             \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("send");                                    \
+		__ret;                                                         \
+	})
+
+#define xrecv_nonblock(fd, buf, len, flags)                                    \
+	({                                                                     \
+		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
+					     IO_TIMEOUT_SEC);                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("recv");                                    \
+		__ret;                                                         \
+	})
+
+#define xsocket(family, sotype, flags)                                         \
+	({                                                                     \
+		int __ret = socket(family, sotype, flags);                     \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("socket");                                  \
+		__ret;                                                         \
+	})
+
+#define xbpf_map_delete_elem(fd, key)                                          \
+	({                                                                     \
+		int __ret = bpf_map_delete_elem((fd), (key));                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("map_delete");                              \
+		__ret;                                                         \
+	})
+
+#define xbpf_map_lookup_elem(fd, key, val)                                     \
+	({                                                                     \
+		int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("map_lookup");                              \
+		__ret;                                                         \
+	})
+
+#define xbpf_map_update_elem(fd, key, val, flags)                              \
+	({                                                                     \
+		int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("map_update");                              \
+		__ret;                                                         \
+	})
+
+#define xbpf_prog_attach(prog, target, type, flags)                            \
+	({                                                                     \
+		int __ret =                                                    \
+			bpf_prog_attach((prog), (target), (type), (flags));    \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("prog_attach(" #type ")");                  \
+		__ret;                                                         \
+	})
+
+#define xbpf_prog_detach2(prog, target, type)                                  \
+	({                                                                     \
+		int __ret = bpf_prog_detach2((prog), (target), (type));        \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("prog_detach2(" #type ")");                 \
+		__ret;                                                         \
+	})
+
+#define xpthread_create(thread, attr, func, arg)                               \
+	({                                                                     \
+		int __ret = pthread_create((thread), (attr), (func), (arg));   \
+		errno = __ret;                                                 \
+		if (__ret)                                                     \
+			FAIL_ERRNO("pthread_create");                          \
+		__ret;                                                         \
+	})
+
+#define xpthread_join(thread, retval)                                          \
+	({                                                                     \
+		int __ret = pthread_join((thread), (retval));                  \
+		errno = __ret;                                                 \
+		if (__ret)                                                     \
+			FAIL_ERRNO("pthread_join");                            \
+		__ret;                                                         \
+	})
+
+static int poll_read(int fd, unsigned int timeout_sec)
+{
+	struct timeval timeout = { .tv_sec = timeout_sec };
+	fd_set rfds;
+	int r;
+
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+
+	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+	if (r == 0)
+		errno = ETIME;
+
+	return r == 1 ? 0 : -1;
+}
+
+static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+			  unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return accept(fd, addr, len);
+}
+
+static int recv_timeout(int fd, void *buf, size_t len, int flags,
+			unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return recv(fd, buf, len, flags);
+}
+
+static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
+{
+	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+	addr4->sin_family = AF_INET;
+	addr4->sin_port = 0;
+	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	*len = sizeof(*addr4);
+}
+
+static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
+{
+	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_port = 0;
+	addr6->sin6_addr = in6addr_loopback;
+	*len = sizeof(*addr6);
+}
+
+static void init_addr_loopback(int family, struct sockaddr_storage *ss,
+			       socklen_t *len)
+{
+	switch (family) {
+	case AF_INET:
+		init_addr_loopback4(ss, len);
+		return;
+	case AF_INET6:
+		init_addr_loopback6(ss, len);
+		return;
+	default:
+		FAIL("unsupported address family %d", family);
+	}
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+	return (struct sockaddr *)ss;
+}
+
+static int enable_reuseport(int s, int progfd)
+{
+	int err, one = 1;
+
+	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+	if (err)
+		return -1;
+	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+			  sizeof(progfd));
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len;
+	int err, s;
+
+	init_addr_loopback(family, &addr, &len);
+
+	s = xsocket(family, sotype, 0);
+	if (s == -1)
+		return -1;
+
+	if (progfd >= 0)
+		enable_reuseport(s, progfd);
+
+	err = xbind(s, sockaddr(&addr), len);
+	if (err)
+		goto close;
+
+	if (sotype & SOCK_DGRAM)
+		return s;
+
+	err = xlisten(s, SOMAXCONN);
+	if (err)
+		goto close;
+
+	return s;
+close:
+	xclose(s);
+	return -1;
+}
+
+static int socket_loopback(int family, int sotype)
+{
+	return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static void test_insert_invalid(int family, int sotype, int mapfd)
+{
+	u32 key = 0;
+	u64 value;
+	int err;
+
+	value = -1;
+	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	if (!err || errno != EINVAL)
+		FAIL_ERRNO("map_update: expected EINVAL");
+
+	value = INT_MAX;
+	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	if (!err || errno != EBADF)
+		FAIL_ERRNO("map_update: expected EBADF");
+}
+
+static void test_insert_opened(int family, int sotype, int mapfd)
+{
+	u32 key = 0;
+	u64 value;
+	int err, s;
+
+	s = xsocket(family, sotype, 0);
+	if (s == -1)
+		return;
+
+	errno = 0;
+	value = s;
+	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	if (!err || errno != EOPNOTSUPP)
+		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+
+	xclose(s);
+}
+
+static void test_insert_bound(int family, int sotype, int mapfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len;
+	u32 key = 0;
+	u64 value;
+	int err, s;
+
+	init_addr_loopback(family, &addr, &len);
+
+	s = xsocket(family, sotype, 0);
+	if (s == -1)
+		return;
+
+	err = xbind(s, sockaddr(&addr), len);
+	if (err)
+		goto close;
+
+	errno = 0;
+	value = s;
+	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	if (!err || errno != EOPNOTSUPP)
+		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+close:
+	xclose(s);
+}
+
+static void test_insert(int family, int sotype, int mapfd)
+{
+	u64 value;
+	u32 key;
+	int s;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	xclose(s);
+}
+
+static void test_delete_after_insert(int family, int sotype, int mapfd)
+{
+	u64 value;
+	u32 key;
+	int s;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	xbpf_map_delete_elem(mapfd, &key);
+	xclose(s);
+}
+
+static void test_delete_after_close(int family, int sotype, int mapfd)
+{
+	int err, s;
+	u64 value;
+	u32 key;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+	xclose(s);
+
+	errno = 0;
+	err = bpf_map_delete_elem(mapfd, &key);
+	if (!err || (errno != EINVAL && errno != ENOENT))
+		/* SOCKMAP and SOCKHASH return different error codes */
+		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
+}
+
+static void test_lookup_after_insert(int family, int sotype, int mapfd)
+{
+	u64 cookie, value;
+	socklen_t len;
+	u32 key;
+	int s;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+	len = sizeof(cookie);
+	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
+
+	xbpf_map_lookup_elem(mapfd, &key, &value);
+
+	if (value != cookie) {
+		FAIL("map_lookup: have %#llx, want %#llx",
+		     (unsigned long long)value, (unsigned long long)cookie);
+	}
+
+	xclose(s);
+}
+
+static void test_lookup_after_delete(int family, int sotype, int mapfd)
+{
+	int err, s;
+	u64 value;
+	u32 key;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	xbpf_map_delete_elem(mapfd, &key);
+
+	errno = 0;
+	err = bpf_map_lookup_elem(mapfd, &key, &value);
+	if (!err || errno != ENOENT)
+		FAIL_ERRNO("map_lookup: expected ENOENT");
+
+	xclose(s);
+}
+
+static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+{
+	u32 key, value32;
+	int err, s;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
+			       sizeof(value32), 1, 0);
+	if (mapfd < 0) {
+		FAIL_ERRNO("map_create");
+		goto close;
+	}
+
+	key = 0;
+	value32 = s;
+	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
+
+	errno = 0;
+	err = bpf_map_lookup_elem(mapfd, &key, &value32);
+	if (!err || errno != ENOSPC)
+		FAIL_ERRNO("map_lookup: expected ENOSPC");
+
+	xclose(mapfd);
+close:
+	xclose(s);
+}
+
+static void test_update_existing(int family, int sotype, int mapfd)
+{
+	int s1, s2;
+	u64 value;
+	u32 key;
+
+	s1 = socket_loopback(family, sotype);
+	if (s1 < 0)
+		return;
+
+	s2 = socket_loopback(family, sotype);
+	if (s2 < 0)
+		goto close_s1;
+
+	key = 0;
+	value = s1;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+	value = s2;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
+	xclose(s2);
+close_s1:
+	xclose(s1);
+}
+
+/* Exercise the code path where we destroy child sockets that never
+ * got accept()'ed, aka orphans, when parent socket gets closed.
+ */
+static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len;
+	int err, s, c;
+	u64 value;
+	u32 key;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+	c = xsocket(family, sotype, 0);
+	if (c == -1)
+		goto close_srv;
+
+	xconnect(c, sockaddr(&addr), len);
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+/* Perform a passive open after removing listening socket from SOCKMAP
+ * to ensure that callbacks get restored properly.
+ */
+static void test_clone_after_delete(int family, int sotype, int mapfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len;
+	int err, s, c;
+	u64 value;
+	u32 key;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	key = 0;
+	value = s;
+	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+	xbpf_map_delete_elem(mapfd, &key);
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		goto close_srv;
+
+	xconnect(c, sockaddr(&addr), len);
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+/* Check that child socket that got created while parent was in a
+ * SOCKMAP, but got accept()'ed only after the parent has been removed
+ * from SOCKMAP, gets cloned without parent psock state or callbacks.
+ */
+static void test_accept_after_delete(int family, int sotype, int mapfd)
+{
+	struct sockaddr_storage addr;
+	const u32 zero = 0;
+	int err, s, c, p;
+	socklen_t len;
+	u64 value;
+
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (s == -1)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	value = s;
+	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+	if (err)
+		goto close_srv;
+
+	c = xsocket(family, sotype, 0);
+	if (c == -1)
+		goto close_srv;
+
+	/* Create child while parent is in sockmap */
+	err = xconnect(c, sockaddr(&addr), len);
+	if (err)
+		goto close_cli;
+
+	/* Remove parent from sockmap */
+	err = xbpf_map_delete_elem(mapfd, &zero);
+	if (err)
+		goto close_cli;
+
+	p = xaccept_nonblock(s, NULL, NULL);
+	if (p == -1)
+		goto close_cli;
+
+	/* Check that child sk_user_data is not set */
+	value = p;
+	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+
+	xclose(p);
+close_cli:
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+/* Check that child socket that got created and accepted while parent
+ * was in a SOCKMAP is cloned without parent psock state or callbacks.
+ */
+static void test_accept_before_delete(int family, int sotype, int mapfd)
+{
+	struct sockaddr_storage addr;
+	const u32 zero = 0, one = 1;
+	int err, s, c, p;
+	socklen_t len;
+	u64 value;
+
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (s == -1)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	value = s;
+	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+	if (err)
+		goto close_srv;
+
+	c = xsocket(family, sotype, 0);
+	if (c == -1)
+		goto close_srv;
+
+	/* Create & accept child while parent is in sockmap */
+	err = xconnect(c, sockaddr(&addr), len);
+	if (err)
+		goto close_cli;
+
+	p = xaccept_nonblock(s, NULL, NULL);
+	if (p == -1)
+		goto close_cli;
+
+	/* Check that child sk_user_data is not set */
+	value = p;
+	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
+
+	xclose(p);
+close_cli:
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+struct connect_accept_ctx {
+	int sockfd;
+	unsigned int done;
+	unsigned int nr_iter;
+};
+
+static bool is_thread_done(struct connect_accept_ctx *ctx)
+{
+	return READ_ONCE(ctx->done);
+}
+
+static void *connect_accept_thread(void *arg)
+{
+	struct connect_accept_ctx *ctx = arg;
+	struct sockaddr_storage addr;
+	int family, socktype;
+	socklen_t len;
+	int err, i, s;
+
+	s = ctx->sockfd;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto done;
+
+	len = sizeof(family);
+	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
+	if (err)
+		goto done;
+
+	len = sizeof(socktype);
+	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
+	if (err)
+		goto done;
+
+	for (i = 0; i < ctx->nr_iter; i++) {
+		int c, p;
+
+		c = xsocket(family, socktype, 0);
+		if (c < 0)
+			break;
+
+		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
+		if (err) {
+			xclose(c);
+			break;
+		}
+
+		p = xaccept_nonblock(s, NULL, NULL);
+		if (p < 0) {
+			xclose(c);
+			break;
+		}
+
+		xclose(p);
+		xclose(c);
+	}
+done:
+	WRITE_ONCE(ctx->done, 1);
+	return NULL;
+}
+
+static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+{
+	struct connect_accept_ctx ctx = { 0 };
+	struct sockaddr_storage addr;
+	socklen_t len;
+	u32 zero = 0;
+	pthread_t t;
+	int err, s;
+	u64 value;
+
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close;
+
+	ctx.sockfd = s;
+	ctx.nr_iter = 1000;
+
+	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
+	if (err)
+		goto close;
+
+	value = s;
+	while (!is_thread_done(&ctx)) {
+		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+		if (err)
+			break;
+
+		err = xbpf_map_delete_elem(mapfd, &zero);
+		if (err)
+			break;
+	}
+
+	xpthread_join(t, NULL);
+close:
+	xclose(s);
+}
+
+static void *listen_thread(void *arg)
+{
+	struct sockaddr unspec = { AF_UNSPEC };
+	struct connect_accept_ctx *ctx = arg;
+	int err, i, s;
+
+	s = ctx->sockfd;
+
+	for (i = 0; i < ctx->nr_iter; i++) {
+		err = xlisten(s, 1);
+		if (err)
+			break;
+		err = xconnect(s, &unspec, sizeof(unspec));
+		if (err)
+			break;
+	}
+
+	WRITE_ONCE(ctx->done, 1);
+	return NULL;
+}
+
+static void test_race_insert_listen(int family, int socktype, int mapfd)
+{
+	struct connect_accept_ctx ctx = { 0 };
+	const u32 zero = 0;
+	const int one = 1;
+	pthread_t t;
+	int err, s;
+	u64 value;
+
+	s = xsocket(family, socktype, 0);
+	if (s < 0)
+		return;
+
+	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+	if (err)
+		goto close;
+
+	ctx.sockfd = s;
+	ctx.nr_iter = 10000;
+
+	err = pthread_create(&t, NULL, listen_thread, &ctx);
+	if (err)
+		goto close;
+
+	value = s;
+	while (!is_thread_done(&ctx)) {
+		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+		/* Expecting EOPNOTSUPP before listen() */
+		if (err && errno != EOPNOTSUPP) {
+			FAIL_ERRNO("map_update");
+			break;
+		}
+
+		err = bpf_map_delete_elem(mapfd, &zero);
+		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
+		if (err && errno != EINVAL && errno != ENOENT) {
+			FAIL_ERRNO("map_delete");
+			break;
+		}
+	}
+
+	xpthread_join(t, NULL);
+close:
+	xclose(s);
+}
+
+static void zero_verdict_count(int mapfd)
+{
+	unsigned int zero = 0;
+	int key;
+
+	key = SK_DROP;
+	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+	key = SK_PASS;
+	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+}
+
+enum redir_mode {
+	REDIR_INGRESS,
+	REDIR_EGRESS,
+};
+
+static const char *redir_mode_str(enum redir_mode mode)
+{
+	switch (mode) {
+	case REDIR_INGRESS:
+		return "ingress";
+	case REDIR_EGRESS:
+		return "egress";
+	default:
+		return "unknown";
+	}
+}
+
+static void redir_to_connected(int family, int sotype, int sock_mapfd,
+			       int verd_mapfd, enum redir_mode mode)
+{
+	const char *log_prefix = redir_mode_str(mode);
+	struct sockaddr_storage addr;
+	int s, c0, c1, p0, p1;
+	unsigned int pass;
+	socklen_t len;
+	int err, n;
+	u64 value;
+	u32 key;
+	char b;
+
+	zero_verdict_count(verd_mapfd);
+
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	c0 = xsocket(family, sotype, 0);
+	if (c0 < 0)
+		goto close_srv;
+	err = xconnect(c0, sockaddr(&addr), len);
+	if (err)
+		goto close_cli0;
+
+	p0 = xaccept_nonblock(s, NULL, NULL);
+	if (p0 < 0)
+		goto close_cli0;
+
+	c1 = xsocket(family, sotype, 0);
+	if (c1 < 0)
+		goto close_peer0;
+	err = xconnect(c1, sockaddr(&addr), len);
+	if (err)
+		goto close_cli1;
+
+	p1 = xaccept_nonblock(s, NULL, NULL);
+	if (p1 < 0)
+		goto close_cli1;
+
+	key = 0;
+	value = p0;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_peer1;
+
+	key = 1;
+	value = p1;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_peer1;
+
+	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
+	if (n < 0)
+		FAIL_ERRNO("%s: write", log_prefix);
+	if (n == 0)
+		FAIL("%s: incomplete write", log_prefix);
+	if (n < 1)
+		goto close_peer1;
+
+	key = SK_PASS;
+	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+	if (err)
+		goto close_peer1;
+	if (pass != 1)
+		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+	n = read(c0, &b, 1);
+	if (n < 0)
+		FAIL_ERRNO("%s: read", log_prefix);
+	if (n == 0)
+		FAIL("%s: incomplete read", log_prefix);
+
+close_peer1:
+	xclose(p1);
+close_cli1:
+	xclose(c1);
+close_peer0:
+	xclose(p0);
+close_cli0:
+	xclose(c0);
+close_srv:
+	xclose(s);
+}
+
+static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
+					struct bpf_map *inner_map, int family,
+					int sotype)
+{
+	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	int sock_map = bpf_map__fd(inner_map);
+	int err;
+
+	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err)
+		return;
+	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err)
+		goto detach;
+
+	redir_to_connected(family, sotype, sock_map, verdict_map,
+			   REDIR_INGRESS);
+
+	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
+					struct bpf_map *inner_map, int family,
+					int sotype)
+{
+	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	int sock_map = bpf_map__fd(inner_map);
+	int err;
+
+	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+	if (err)
+		return;
+
+	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void redir_to_listening(int family, int sotype, int sock_mapfd,
+			       int verd_mapfd, enum redir_mode mode)
+{
+	const char *log_prefix = redir_mode_str(mode);
+	struct sockaddr_storage addr;
+	int s, c, p, err, n;
+	unsigned int drop;
+	socklen_t len;
+	u64 value;
+	u32 key;
+
+	zero_verdict_count(verd_mapfd);
+
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		goto close_srv;
+	err = xconnect(c, sockaddr(&addr), len);
+	if (err)
+		goto close_cli;
+
+	p = xaccept_nonblock(s, NULL, NULL);
+	if (p < 0)
+		goto close_cli;
+
+	key = 0;
+	value = s;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_peer;
+
+	key = 1;
+	value = p;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_peer;
+
+	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
+	if (n < 0 && errno != EACCES)
+		FAIL_ERRNO("%s: write", log_prefix);
+	if (n == 0)
+		FAIL("%s: incomplete write", log_prefix);
+	if (n < 1)
+		goto close_peer;
+
+	key = SK_DROP;
+	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
+	if (err)
+		goto close_peer;
+	if (drop != 1)
+		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
+
+close_peer:
+	xclose(p);
+close_cli:
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
+					struct bpf_map *inner_map, int family,
+					int sotype)
+{
+	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	int sock_map = bpf_map__fd(inner_map);
+	int err;
+
+	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err)
+		return;
+	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err)
+		goto detach;
+
+	redir_to_listening(family, sotype, sock_map, verdict_map,
+			   REDIR_INGRESS);
+
+	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
+					struct bpf_map *inner_map, int family,
+					int sotype)
+{
+	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	int sock_map = bpf_map__fd(inner_map);
+	int err;
+
+	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+	if (err)
+		return;
+
+	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void test_reuseport_select_listening(int family, int sotype,
+					    int sock_map, int verd_map,
+					    int reuseport_prog)
+{
+	struct sockaddr_storage addr;
+	unsigned int pass;
+	int s, c, err;
+	socklen_t len;
+	u64 value;
+	u32 key;
+
+	zero_verdict_count(verd_map);
+
+	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
+				      reuseport_prog);
+	if (s < 0)
+		return;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	key = 0;
+	value = s;
+	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_srv;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		goto close_srv;
+	err = xconnect(c, sockaddr(&addr), len);
+	if (err)
+		goto close_cli;
+
+	if (sotype == SOCK_STREAM) {
+		int p;
+
+		p = xaccept_nonblock(s, NULL, NULL);
+		if (p < 0)
+			goto close_cli;
+		xclose(p);
+	} else {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+
+		n = xrecv_nonblock(s, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+	}
+
+	key = SK_PASS;
+	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
+	if (err)
+		goto close_cli;
+	if (pass != 1)
+		FAIL("want pass count 1, have %d", pass);
+
+close_cli:
+	xclose(c);
+close_srv:
+	xclose(s);
+}
+
+static void test_reuseport_select_connected(int family, int sotype,
+					    int sock_map, int verd_map,
+					    int reuseport_prog)
+{
+	struct sockaddr_storage addr;
+	int s, c0, c1, p0, err;
+	unsigned int drop;
+	socklen_t len;
+	u64 value;
+	u32 key;
+
+	zero_verdict_count(verd_map);
+
+	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
+	if (s < 0)
+		return;
+
+	/* Populate sock_map[0] to avoid ENOENT on first connection */
+	key = 0;
+	value = s;
+	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_srv;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	c0 = xsocket(family, sotype, 0);
+	if (c0 < 0)
+		goto close_srv;
+
+	err = xconnect(c0, sockaddr(&addr), len);
+	if (err)
+		goto close_cli0;
+
+	if (sotype == SOCK_STREAM) {
+		p0 = xaccept_nonblock(s, NULL, NULL);
+		if (p0 < 0)
+			goto close_cli0;
+	} else {
+		p0 = xsocket(family, sotype, 0);
+		if (p0 < 0)
+			goto close_cli0;
+
+		len = sizeof(addr);
+		err = xgetsockname(c0, sockaddr(&addr), &len);
+		if (err)
+			goto close_cli0;
+
+		err = xconnect(p0, sockaddr(&addr), len);
+		if (err)
+			goto close_cli0;
+	}
+
+	/* Update sock_map[0] to redirect to a connected socket */
+	key = 0;
+	value = p0;
+	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
+	if (err)
+		goto close_peer0;
+
+	c1 = xsocket(family, sotype, 0);
+	if (c1 < 0)
+		goto close_peer0;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
+	errno = 0;
+	err = connect(c1, sockaddr(&addr), len);
+	if (sotype == SOCK_DGRAM) {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c1, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli1;
+
+		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+		err = n == -1;
+	}
+	if (!err || errno != ECONNREFUSED)
+		FAIL_ERRNO("connect: expected ECONNREFUSED");
+
+	key = SK_DROP;
+	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+	if (err)
+		goto close_cli1;
+	if (drop != 1)
+		FAIL("want drop count 1, have %d", drop);
+
+close_cli1:
+	xclose(c1);
+close_peer0:
+	xclose(p0);
+close_cli0:
+	xclose(c0);
+close_srv:
+	xclose(s);
+}
+
+/* Check that redirecting across reuseport groups is not allowed. */
+static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
+					int verd_map, int reuseport_prog)
+{
+	struct sockaddr_storage addr;
+	int s1, s2, c, err;
+	unsigned int drop;
+	socklen_t len;
+	u64 value;
+	u32 key;
+
+	zero_verdict_count(verd_map);
+
+	/* Create two listeners, each in its own reuseport group */
+	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+	if (s1 < 0)
+		return;
+
+	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+	if (s2 < 0)
+		goto close_srv1;
+
+	key = 0;
+	value = s1;
+	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_srv2;
+
+	key = 1;
+	value = s2;
+	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+
+	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
+	len = sizeof(addr);
+	err = xgetsockname(s2, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv2;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		goto close_srv2;
+
+	err = connect(c, sockaddr(&addr), len);
+	if (sotype == SOCK_DGRAM) {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+
+		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+		err = n == -1;
+	}
+	if (!err || errno != ECONNREFUSED) {
+		FAIL_ERRNO("connect: expected ECONNREFUSED");
+		goto close_cli;
+	}
+
+	/* Expect drop, can't redirect outside of reuseport group */
+	key = SK_DROP;
+	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+	if (err)
+		goto close_cli;
+	if (drop != 1)
+		FAIL("want drop count 1, have %d", drop);
+
+close_cli:
+	xclose(c);
+close_srv2:
+	xclose(s2);
+close_srv1:
+	xclose(s1);
+}
+
+#define TEST(fn, ...)                                                          \
+	{                                                                      \
+		fn, #fn, __VA_ARGS__                                           \
+	}
+
+static void test_ops_cleanup(const struct bpf_map *map)
+{
+	const struct bpf_map_def *def;
+	int err, mapfd;
+	u32 key;
+
+	def = bpf_map__def(map);
+	mapfd = bpf_map__fd(map);
+
+	for (key = 0; key < def->max_entries; key++) {
+		err = bpf_map_delete_elem(mapfd, &key);
+		if (err && errno != EINVAL && errno != ENOENT)
+			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
+	}
+}
+
+static const char *family_str(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return "IPv4";
+	case AF_INET6:
+		return "IPv6";
+	default:
+		return "unknown";
+	}
+}
+
+static const char *map_type_str(const struct bpf_map *map)
+{
+	const struct bpf_map_def *def;
+
+	def = bpf_map__def(map);
+	if (IS_ERR(def))
+		return "invalid";
+
+	switch (def->type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return "sockmap";
+	case BPF_MAP_TYPE_SOCKHASH:
+		return "sockhash";
+	default:
+		return "unknown";
+	}
+}
+
+static const char *sotype_str(int sotype)
+{
+	switch (sotype) {
+	case SOCK_DGRAM:
+		return "UDP";
+	case SOCK_STREAM:
+		return "TCP";
+	default:
+		return "unknown";
+	}
+}
+
+static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
+		     int family, int sotype)
+{
+	const struct op_test {
+		void (*fn)(int family, int sotype, int mapfd);
+		const char *name;
+		int sotype;
+	} tests[] = {
+		/* insert */
+		TEST(test_insert_invalid),
+		TEST(test_insert_opened),
+		TEST(test_insert_bound, SOCK_STREAM),
+		TEST(test_insert),
+		/* delete */
+		TEST(test_delete_after_insert),
+		TEST(test_delete_after_close),
+		/* lookup */
+		TEST(test_lookup_after_insert),
+		TEST(test_lookup_after_delete),
+		TEST(test_lookup_32_bit_value),
+		/* update */
+		TEST(test_update_existing),
+		/* races with insert/delete */
+		TEST(test_destroy_orphan_child, SOCK_STREAM),
+		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
+		TEST(test_race_insert_listen, SOCK_STREAM),
+		/* child clone */
+		TEST(test_clone_after_delete, SOCK_STREAM),
+		TEST(test_accept_after_delete, SOCK_STREAM),
+		TEST(test_accept_before_delete, SOCK_STREAM),
+	};
+	const char *family_name, *map_name, *sotype_name;
+	const struct op_test *t;
+	char s[MAX_TEST_NAME];
+	int map_fd;
+
+	family_name = family_str(family);
+	map_name = map_type_str(map);
+	sotype_name = sotype_str(sotype);
+	map_fd = bpf_map__fd(map);
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+			 sotype_name, t->name);
+
+		if (t->sotype != 0 && t->sotype != sotype)
+			continue;
+
+		if (!test__start_subtest(s))
+			continue;
+
+		t->fn(family, sotype, map_fd);
+		test_ops_cleanup(map);
+	}
+}
+
+static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+		       int family, int sotype)
+{
+	const struct redir_test {
+		void (*fn)(struct test_sockmap_listen *skel,
+			   struct bpf_map *map, int family, int sotype);
+		const char *name;
+	} tests[] = {
+		TEST(test_skb_redir_to_connected),
+		TEST(test_skb_redir_to_listening),
+		TEST(test_msg_redir_to_connected),
+		TEST(test_msg_redir_to_listening),
+	};
+	const char *family_name, *map_name;
+	const struct redir_test *t;
+	char s[MAX_TEST_NAME];
+
+	family_name = family_str(family);
+	map_name = map_type_str(map);
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
+			 t->name);
+
+		if (!test__start_subtest(s))
+			continue;
+
+		t->fn(skel, map, family, sotype);
+	}
+}
+
+static void test_reuseport(struct test_sockmap_listen *skel,
+			   struct bpf_map *map, int family, int sotype)
+{
+	const struct reuseport_test {
+		void (*fn)(int family, int sotype, int socket_map,
+			   int verdict_map, int reuseport_prog);
+		const char *name;
+		int sotype;
+	} tests[] = {
+		TEST(test_reuseport_select_listening),
+		TEST(test_reuseport_select_connected),
+		TEST(test_reuseport_mixed_groups),
+	};
+	int socket_map, verdict_map, reuseport_prog;
+	const char *family_name, *map_name, *sotype_name;
+	const struct reuseport_test *t;
+	char s[MAX_TEST_NAME];
+
+	family_name = family_str(family);
+	map_name = map_type_str(map);
+	sotype_name = sotype_str(sotype);
+
+	socket_map = bpf_map__fd(map);
+	verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+			 sotype_name, t->name);
+
+		if (t->sotype != 0 && t->sotype != sotype)
+			continue;
+
+		if (!test__start_subtest(s))
+			continue;
+
+		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
+	}
+}
+
+static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
+		      int family)
+{
+	test_ops(skel, map, family, SOCK_STREAM);
+	test_ops(skel, map, family, SOCK_DGRAM);
+	test_redir(skel, map, family, SOCK_STREAM);
+	test_reuseport(skel, map, family, SOCK_STREAM);
+	test_reuseport(skel, map, family, SOCK_DGRAM);
+}
+
+void test_sockmap_listen(void)
+{
+	struct test_sockmap_listen *skel;
+
+	skel = test_sockmap_listen__open_and_load();
+	if (!skel) {
+		FAIL("skeleton open/load failed");
+		return;
+	}
+
+	skel->bss->test_sockmap = true;
+	run_tests(skel, skel->maps.sock_map, AF_INET);
+	run_tests(skel, skel->maps.sock_map, AF_INET6);
+
+	skel->bss->test_sockmap = false;
+	run_tests(skel, skel->maps.sock_hash, AF_INET);
+	run_tests(skel, skel->maps.sock_hash, AF_INET6);
+
+	test_sockmap_listen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index f4cd60d6fba2..e56b52ab41da 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -188,7 +188,7 @@ static int start_server(void)
 	};
 	int fd;
 
-	fd = socket(AF_INET, SOCK_STREAM, 0);
+	fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
 	if (fd < 0) {
 		log_err("Failed to create server socket");
 		return -1;
@@ -205,6 +205,7 @@ static int start_server(void)
 
 static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+static volatile bool server_done = false;
 
 static void *server_thread(void *arg)
 {
@@ -222,23 +223,24 @@ static void *server_thread(void *arg)
 
 	if (CHECK_FAIL(err < 0)) {
 		perror("Failed to listed on socket");
-		return NULL;
+		return ERR_PTR(err);
 	}
 
-	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+	while (true) {
+		client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+		if (client_fd == -1 && errno == EAGAIN) {
+			usleep(50);
+			continue;
+		}
+		break;
+	}
 	if (CHECK_FAIL(client_fd < 0)) {
 		perror("Failed to accept client");
-		return NULL;
+		return ERR_PTR(err);
 	}
 
-	/* Wait for the next connection (that never arrives)
-	 * to keep this thread alive to prevent calling
-	 * close() on client_fd.
-	 */
-	if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
-		perror("Unexpected success in second accept");
-		return NULL;
-	}
+	while (!server_done)
+		usleep(50);
 
 	close(client_fd);
 
@@ -249,6 +251,7 @@ void test_tcp_rtt(void)
 {
 	int server_fd, cgroup_fd;
 	pthread_t tid;
+	void *server_res;
 
 	cgroup_fd = test__join_cgroup("/tcp_rtt");
 	if (CHECK_FAIL(cgroup_fd < 0))
@@ -267,6 +270,11 @@ void test_tcp_rtt(void)
 	pthread_mutex_unlock(&server_started_mtx);
 
 	CHECK_FAIL(run_test(cgroup_fd, server_fd));
+
+	server_done = true;
+	CHECK_FAIL(pthread_join(tid, &server_res));
+	CHECK_FAIL(IS_ERR(server_res));
+
 close_server_fd:
 	close(server_fd);
 close_cgroup_fd:
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
new file mode 100644
index 000000000000..1e4c258de09d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "lsm.skel.h"
+
+char *CMD_ARGS[] = {"true", NULL};
+
+int heap_mprotect(void)
+{
+	void *buf;
+	long sz;
+	int ret;
+
+	sz = sysconf(_SC_PAGESIZE);
+	if (sz < 0)
+		return sz;
+
+	buf = memalign(sz, 2 * sz);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	ret = mprotect(buf, sz, PROT_READ | PROT_WRITE | PROT_EXEC);
+	free(buf);
+	return ret;
+}
+
+int exec_cmd(int *monitored_pid)
+{
+	int child_pid, child_status;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		*monitored_pid = getpid();
+		execvp(CMD_ARGS[0], CMD_ARGS);
+		return -EINVAL;
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return child_status;
+	}
+
+	return -EINVAL;
+}
+
+void test_test_lsm(void)
+{
+	struct lsm *skel = NULL;
+	int err, duration = 0;
+
+	skel = lsm__open_and_load();
+	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+		goto close_prog;
+
+	err = lsm__attach(skel);
+	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+		goto close_prog;
+
+	err = exec_cmd(&skel->bss->monitored_pid);
+	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
+	      skel->bss->bprm_count);
+
+	skel->bss->monitored_pid = getpid();
+
+	err = heap_mprotect();
+	if (CHECK(errno != EPERM, "heap_mprotect", "want errno=EPERM, got %d\n",
+		  errno))
+		goto close_prog;
+
+	CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
+	      "mprotect_count = %d\n", skel->bss->mprotect_count);
+
+close_prog:
+	lsm__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 1f6ccdaed1ac..781c8d11604b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,31 +55,40 @@ void test_trampoline_count(void)
 	/* attach 'allowed' 40 trampoline programs */
 	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
 		obj = bpf_object__open_file(object, NULL);
-		if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+		if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+			obj = NULL;
 			goto cleanup;
+		}
 
 		err = bpf_object__load(obj);
 		if (CHECK(err, "obj_load", "err %d\n", err))
 			goto cleanup;
 		inst[i].obj = obj;
+		obj = NULL;
 
 		if (rand() % 2) {
-			link = load(obj, fentry_name);
-			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+			link = load(inst[i].obj, fentry_name);
+			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+				link = NULL;
 				goto cleanup;
+			}
 			inst[i].link_fentry = link;
 		} else {
-			link = load(obj, fexit_name);
-			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+			link = load(inst[i].obj, fexit_name);
+			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+				link = NULL;
 				goto cleanup;
+			}
 			inst[i].link_fexit = link;
 		}
 	}
 
 	/* and try 1 extra.. */
 	obj = bpf_object__open_file(object, NULL);
-	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+		obj = NULL;
 		goto cleanup;
+	}
 
 	err = bpf_object__load(obj);
 	if (CHECK(err, "obj_load", "err %d\n", err))
@@ -104,7 +113,9 @@ void test_trampoline_count(void)
 cleanup_extra:
 	bpf_object__close(obj);
 cleanup:
-	while (--i) {
+	if (i >= MAX_TRAMP_PROGS)
+		i = MAX_TRAMP_PROGS - 1;
+	for (; i >= 0; i--) {
 		bpf_link__destroy(inst[i].link_fentry);
 		bpf_link__destroy(inst[i].link_fexit);
 		bpf_object__close(inst[i].obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
new file mode 100644
index 000000000000..72310cfc6474
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_vmlinux.skel.h"
+
+#define MY_TV_NSEC 1337
+
+static void nsleep()
+{
+	struct timespec ts = { .tv_nsec = MY_TV_NSEC };
+
+	(void)syscall(__NR_nanosleep, &ts, NULL);
+}
+
+void test_vmlinux(void)
+{
+	int duration = 0, err;
+	struct test_vmlinux* skel;
+	struct test_vmlinux__bss *bss;
+
+	skel = test_vmlinux__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+
+	err = test_vmlinux__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger everything */
+	nsleep();
+
+	CHECK(!bss->tp_called, "tp", "not called\n");
+	CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
+	CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
+	CHECK(!bss->kprobe_called, "kprobe", "not called\n");
+	CHECK(!bss->fentry_called, "fentry", "not called\n");
+
+cleanup:
+	test_vmlinux__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
new file mode 100644
index 000000000000..05b294d6b923
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#define IFINDEX_LO 1
+#define XDP_FLAGS_REPLACE		(1U << 4)
+
+void test_xdp_attach(void)
+{
+	struct bpf_object *obj1, *obj2, *obj3;
+	const char *file = "./test_xdp.o";
+	int err, fd1, fd2, fd3;
+	__u32 duration = 0;
+	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
+			    .old_fd = -1);
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+	if (CHECK_FAIL(err))
+		return;
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+	if (CHECK_FAIL(err))
+		goto out_1;
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+	if (CHECK_FAIL(err))
+		goto out_2;
+
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
+				       &opts);
+	if (CHECK(err, "load_ok", "initial load failed"))
+		goto out_close;
+
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
+				       &opts);
+	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
+		goto out;
+
+	opts.old_fd = fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
+		goto out;
+
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
+		goto out;
+
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
+		goto out;
+
+	opts.old_fd = fd2;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
+		goto out;
+
+out:
+	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+out_close:
+	bpf_object__close(obj3);
+out_2:
+	bpf_object__close(obj2);
+out_1:
+	bpf_object__close(obj1);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 6b56bdc73ebc..a0f688c37023 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -4,17 +4,51 @@
 #include "test_xdp.skel.h"
 #include "test_xdp_bpf2bpf.skel.h"
 
+struct meta {
+	int ifindex;
+	int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+	int duration = 0;
+	struct meta *meta = (struct meta *)data;
+	struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+	if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+		  "check_size", "size %u < %zu\n",
+		  size, sizeof(pkt_v4) + sizeof(*meta)))
+		return;
+
+	if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+		  "meta->ifindex = %d\n", meta->ifindex))
+		return;
+
+	if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+		  "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+		return;
+
+	if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+		  "check_packet_content", "content not the same\n"))
+		return;
+
+	*(bool *)ctx = true;
+}
+
 void test_xdp_bpf2bpf(void)
 {
 	__u32 duration = 0, retval, size;
 	char buf[128];
 	int err, pkt_fd, map_fd;
+	bool passed = false;
 	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
 	struct iptnl_info value4 = {.family = AF_INET};
 	struct test_xdp *pkt_skel = NULL;
 	struct test_xdp_bpf2bpf *ftrace_skel = NULL;
 	struct vip key4 = {.protocol = 6, .family = AF_INET};
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct bpf_program *prog;
+	struct perf_buffer *pb = NULL;
+	struct perf_buffer_opts pb_opts = {};
 
 	/* Load XDP program to introspect */
 	pkt_skel = test_xdp__open_and_load();
@@ -27,11 +61,21 @@ void test_xdp_bpf2bpf(void)
 	bpf_map_update_elem(map_fd, &key4, &value4, 0);
 
 	/* Load trace program */
-	opts.attach_prog_fd = pkt_fd,
-	ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
+	ftrace_skel = test_xdp_bpf2bpf__open();
 	if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
 		goto out;
 
+	/* Demonstrate the bpf_program__set_attach_target() API rather than
+	 * the load with options, i.e. opts.attach_prog_fd.
+	 */
+	prog = ftrace_skel->progs.trace_on_entry;
+	bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY);
+	bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
+	prog = ftrace_skel->progs.trace_on_exit;
+	bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT);
+	bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
 	err = test_xdp_bpf2bpf__load(ftrace_skel);
 	if (CHECK(err, "__load", "ftrace skeleton failed\n"))
 		goto out;
@@ -40,6 +84,14 @@ void test_xdp_bpf2bpf(void)
 	if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
 		goto out;
 
+	/* Set up perf buffer */
+	pb_opts.sample_cb = on_sample;
+	pb_opts.ctx = &passed;
+	pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+			      1, &pb_opts);
+	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+		goto out;
+
 	/* Run test program */
 	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
@@ -50,6 +102,15 @@ void test_xdp_bpf2bpf(void)
 		  err, errno, retval, size))
 		goto out;
 
+	/* Make sure bpf_xdp_output() was triggered and it sent the expected
+	 * data to the perf ring buffer.
+	 */
+	err = perf_buffer__poll(pb, 100);
+	if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+		goto out;
+
+	CHECK_FAIL(!passed);
+
 	/* Verify test results */
 	if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
 		  "result", "fentry failed err %llu\n",
@@ -60,6 +121,8 @@ void test_xdp_bpf2bpf(void)
 	      "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
 
 out:
+	if (pb)
+		perf_buffer__free(pb);
 	test_xdp__destroy(pkt_skel);
 	test_xdp_bpf2bpf__destroy(ftrace_skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index b631fb5032d2..3fb4260570b1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -6,14 +6,24 @@
  * the kernel BPF logic.
  */
 
+#include <stddef.h>
 #include <linux/bpf.h>
 #include <linux/types.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 #include "bpf_tcp_helpers.h"
 
 char _license[] SEC("license") = "GPL";
 
+int stg_result = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
 #define DCTCP_MAX_ALPHA	1024U
 
 struct dctcp {
@@ -43,12 +53,18 @@ void BPF_PROG(dctcp_init, struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct dctcp *ca = inet_csk_ca(sk);
+	int *stg;
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
 	ca->loss_cwnd = 0;
 	ca->ce_state = 0;
 
+	stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0);
+	if (stg) {
+		stg_result = *stg;
+		bpf_sk_storage_delete(&sk_stg_map, (void *)tp);
+	}
 	dctcp_reset(tp, ca);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index d4a02fe44a12..31975c96e2c9 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -13,7 +13,7 @@ enum e1 {
 
 enum e2 {
 	C = 100,
-	D = -100,
+	D = 4294967295,
 	E = 0,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 38d3a82144ca..9365b686f84b 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index c329fccf9842..98e1efe14549 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -5,7 +5,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 struct sk_buff {
 	unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
index 92f3fa47cf40..85c0b516d6ee 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 struct sk_buff {
 	unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 348109b9ea07..bd1e17d8024c 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index 8f48a909f079..a46a264ce24e 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -4,7 +4,7 @@
 #include <stdbool.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 struct {
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
new file mode 100644
index 000000000000..a4e3c223028d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include  <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+int monitored_pid = 0;
+int mprotect_count = 0;
+int bprm_count = 0;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+	     unsigned long reqprot, unsigned long prot, int ret)
+{
+	if (ret != 0)
+		return ret;
+
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	int is_heap = 0;
+
+	is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+		   vma->vm_end <= vma->vm_mm->brk);
+
+	if (is_heap && monitored_pid == pid) {
+		mprotect_count++;
+		ret = -EPERM;
+	}
+
+	return ret;
+}
+
+SEC("lsm/bprm_committed_creds")
+int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (monitored_pid == pid)
+		bprm_count++;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
new file mode 100644
index 000000000000..8b7466a15c6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int sequence = 0;
+__s32 input_retval = 0;
+
+__u64 fentry_result = 0;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, __u64 b)
+{
+	sequence++;
+	fentry_result = (sequence == 1);
+	return 0;
+}
+
+__u64 fmod_ret_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
+{
+	sequence++;
+	/* This is the first fmod_ret program, the ret passed should be 0 */
+	fmod_ret_result = (sequence == 2 && ret == 0);
+	return input_retval;
+}
+
+__u64 fexit_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, __u64 b, int ret)
+{
+	sequence++;
+	/* If the input_reval is non-zero a successful modification should have
+	 * occurred.
+	 */
+	if (input_retval)
+		fexit_result = (sequence == 3 && ret == input_retval);
+	else
+		fexit_result = (sequence == 3 && ret == 4);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index a5c6d5903b22..ca283af80d4e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -12,7 +12,6 @@ int bpf_prog1(struct __sk_buff *skb)
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
 	__u8 *d = data;
-	__u32 len = (__u32) data_end - (__u32) data;
 	int err;
 
 	if (data + 10 > data_end) {
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index dd8fae6660ab..8056a4c6d918 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -4,6 +4,7 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 int kprobe_res = 0;
 int kretprobe_res = 0;
@@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx)
 }
 
 SEC("kretprobe/sys_nanosleep")
-int handle_kretprobe(struct pt_regs *ctx)
+int BPF_KRETPROBE(handle_kretprobe)
 {
 	kretprobe_res = 2;
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
new file mode 100644
index 000000000000..77e47b9e4446
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int calls = 0;
+int alt_calls = 0;
+
+SEC("cgroup_skb/egress1")
+int egress(struct __sk_buff *skb)
+{
+	__sync_fetch_and_add(&calls, 1);
+	return 1;
+}
+
+SEC("cgroup_skb/egress2")
+int egress_alt(struct __sk_buff *skb)
+{
+	__sync_fetch_and_add(&alt_calls, 1);
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
new file mode 100644
index 000000000000..8941a41c2a55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_STACK_RAWTP 10
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_prog2(void *ctx)
+{
+	__u64 stack[MAX_STACK_RAWTP];
+	int error;
+
+	/* set all the flags which should return -EINVAL */
+	error = bpf_get_stack(ctx, stack, 0, -1);
+	if (error < 0)
+		goto loop;
+
+	return error;
+loop:
+	while (1) {
+		error++;
+	}
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index dd7a4d3dbc0d..1319be1c54ba 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
 		bpf_map_update_elem(&result_##map, &key, var, 0);	\
 	} while (0)
 
-SEC("static_data_load")
+SEC("classifier/static_data_load")
 int load_static_data(struct __sk_buff *skb)
 {
 	static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c
new file mode 100644
index 000000000000..bbf2a5264dc0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int in = 0;
+int out = 0;
+
+SEC("raw_tp/sys_enter")
+int raw_tp_prog(const void *ctx)
+{
+	out = in;
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int tp_btf_prog(const void *ctx)
+{
+	out = in;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
new file mode 100644
index 000000000000..1dca70a6de2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+static volatile struct {
+	__u64 dev;
+	__u64 ino;
+	__u64 pid_tgid;
+	__u64 user_pid_tgid;
+} res;
+
+SEC("raw_tracepoint/sys_enter")
+int trace(void *ctx)
+{
+	__u64  ns_pid_tgid, expected_pid;
+	struct bpf_pidns_info nsdata;
+	__u32 key = 0;
+
+	if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
+		   sizeof(struct bpf_pidns_info)))
+		return 0;
+
+	ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
+	expected_pid = res.user_pid_tgid;
+
+	if (expected_pid != ns_pid_tgid)
+		return 0;
+
+	res.pid_tgid = ns_pid_tgid;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index bfe9fbcb9684..56a50b25cd33 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -6,7 +6,6 @@
 #include <linux/ptrace.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
 
 struct task_struct;
 
@@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
 }
 
 SEC("kretprobe/__set_task_comm")
-int BPF_KRETPROBE(prog2,
-		  struct task_struct *tsk, const char *buf, bool exec,
-		  int ret)
+int BPF_KRETPROBE(prog2, int ret)
 {
-	return !PT_REGS_PARM1(ctx) && ret;
+	return ret;
 }
 
 SEC("raw_tp/task_rename")
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
new file mode 100644
index 000000000000..a1ccc831c882
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stddef.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int valid = 0;
+int required_size_out = 0;
+int written_stack_out = 0;
+int written_global_out = 0;
+
+struct {
+	__u64 _a;
+	__u64 _b;
+	__u64 _c;
+} fpbe[30] = {0};
+
+SEC("perf_event")
+int perf_branches(void *ctx)
+{
+	__u64 entries[4 * 3] = {0};
+	int required_size, written_stack, written_global;
+
+	/* write to stack */
+	written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
+	/* ignore spurious events */
+	if (!written_stack)
+		return 1;
+
+	/* get required size */
+	required_size = bpf_read_branch_records(ctx, NULL, 0,
+						BPF_F_GET_BRANCH_RECORDS_SIZE);
+
+	written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
+	/* ignore spurious events */
+	if (!written_global)
+		return 1;
+
+	required_size_out = required_size;
+	written_stack_out = written_stack;
+	written_global_out = written_global;
+	valid = 1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ebfcc9f50c35..ad59c4c9aba8 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -4,7 +4,7 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index d556b1572cc6..89b3532ccc75 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,7 +7,6 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
 
 static struct sockaddr_in old;
 
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 1acc91e87bfc..b4233d3efac2 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -31,6 +31,12 @@ int send_signal_tp(void *ctx)
 	return bpf_send_signal_test(ctx);
 }
 
+SEC("tracepoint/sched/sched_switch")
+int send_signal_tp_sched(void *ctx)
+{
+	return bpf_send_signal_test(ctx);
+}
+
 SEC("perf_event")
 int send_signal_perf(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
new file mode 100644
index 000000000000..8f530843b4da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Cloudflare Ltd.
+// Copyright (c) 2020 Isovalent, Inc.
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static inline struct bpf_sock_tuple *
+get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct bpf_sock_tuple *result;
+	struct ethhdr *eth;
+	__u64 tuple_len;
+	__u8 proto = 0;
+	__u64 ihl_len;
+
+	eth = (struct ethhdr *)(data);
+	if (eth + 1 > data_end)
+		return NULL;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
+
+		if (iph + 1 > data_end)
+			return NULL;
+		if (iph->ihl != 5)
+			/* Options are not supported */
+			return NULL;
+		ihl_len = iph->ihl * 4;
+		proto = iph->protocol;
+		*ipv4 = true;
+		result = (struct bpf_sock_tuple *)&iph->saddr;
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
+
+		if (ip6h + 1 > data_end)
+			return NULL;
+		ihl_len = sizeof(*ip6h);
+		proto = ip6h->nexthdr;
+		*ipv4 = false;
+		result = (struct bpf_sock_tuple *)&ip6h->saddr;
+	} else {
+		return (struct bpf_sock_tuple *)data;
+	}
+
+	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
+		return NULL;
+
+	*tcp = (proto == IPPROTO_TCP);
+	return result;
+}
+
+static inline int
+handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+	struct bpf_sock_tuple ln = {0};
+	struct bpf_sock *sk;
+	size_t tuple_len;
+	int ret;
+
+	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+		return TC_ACT_SHOT;
+
+	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+	if (sk)
+		goto assign;
+
+	if (ipv4) {
+		if (tuple->ipv4.dport != bpf_htons(4321))
+			return TC_ACT_OK;
+
+		ln.ipv4.daddr = bpf_htonl(0x7f000001);
+		ln.ipv4.dport = bpf_htons(1234);
+
+		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
+					BPF_F_CURRENT_NETNS, 0);
+	} else {
+		if (tuple->ipv6.dport != bpf_htons(4321))
+			return TC_ACT_OK;
+
+		/* Upper parts of daddr are already zero. */
+		ln.ipv6.daddr[3] = bpf_htonl(0x1);
+		ln.ipv6.dport = bpf_htons(1234);
+
+		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
+					BPF_F_CURRENT_NETNS, 0);
+	}
+
+	/* workaround: We can't do a single socket lookup here, because then
+	 * the compiler will likely spill tuple_len to the stack. This makes it
+	 * lose all bounds information in the verifier, which then rejects the
+	 * call as unsafe.
+	 */
+	if (!sk)
+		return TC_ACT_SHOT;
+
+assign:
+	ret = bpf_sk_assign(skb, sk, 0);
+	bpf_sk_release(sk);
+	return ret;
+}
+
+static inline int
+handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+	struct bpf_sock_tuple ln = {0};
+	struct bpf_sock *sk;
+	size_t tuple_len;
+	int ret;
+
+	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+		return TC_ACT_SHOT;
+
+	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+	if (sk) {
+		if (sk->state != BPF_TCP_LISTEN)
+			goto assign;
+		bpf_sk_release(sk);
+	}
+
+	if (ipv4) {
+		if (tuple->ipv4.dport != bpf_htons(4321))
+			return TC_ACT_OK;
+
+		ln.ipv4.daddr = bpf_htonl(0x7f000001);
+		ln.ipv4.dport = bpf_htons(1234);
+
+		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
+					BPF_F_CURRENT_NETNS, 0);
+	} else {
+		if (tuple->ipv6.dport != bpf_htons(4321))
+			return TC_ACT_OK;
+
+		/* Upper parts of daddr are already zero. */
+		ln.ipv6.daddr[3] = bpf_htonl(0x1);
+		ln.ipv6.dport = bpf_htons(1234);
+
+		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
+					BPF_F_CURRENT_NETNS, 0);
+	}
+
+	/* workaround: We can't do a single socket lookup here, because then
+	 * the compiler will likely spill tuple_len to the stack. This makes it
+	 * lose all bounds information in the verifier, which then rejects the
+	 * call as unsafe.
+	 */
+	if (!sk)
+		return TC_ACT_SHOT;
+
+	if (sk->state != BPF_TCP_LISTEN) {
+		bpf_sk_release(sk);
+		return TC_ACT_SHOT;
+	}
+
+assign:
+	ret = bpf_sk_assign(skb, sk, 0);
+	bpf_sk_release(sk);
+	return ret;
+}
+
+SEC("classifier/sk_assign_test")
+int bpf_sk_assign_test(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple *tuple, ln = {0};
+	bool ipv4 = false;
+	bool tcp = false;
+	int tuple_len;
+	int ret = 0;
+
+	tuple = get_tuple(skb, &ipv4, &tcp);
+	if (!tuple)
+		return TC_ACT_SHOT;
+
+	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
+	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
+	 * the same here, if we try to share the implementations they will
+	 * fail to verify because we're crossing pointer types.
+	 */
+	if (tcp)
+		ret = handle_tcp(skb, tuple, ipv4);
+	else
+		ret = handle_udp(skb, tuple, ipv4);
+
+	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 202de3938494..b02ea589ce7e 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -23,6 +23,8 @@ int process(struct __sk_buff *skb)
 		return 1;
 	if (skb->gso_segs != 8)
 		return 1;
+	if (skb->gso_size != 10)
+		return 1;
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
new file mode 100644
index 000000000000..a3a366c57ce1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, unsigned int);
+} verdict_map SEC(".maps");
+
+static volatile bool test_sockmap; /* toggled by user-space */
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	unsigned int *count;
+	__u32 zero = 0;
+	int verdict;
+
+	if (test_sockmap)
+		verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0);
+	else
+		verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0);
+
+	count = bpf_map_lookup_elem(&verdict_map, &verdict);
+	if (count)
+		(*count)++;
+
+	return verdict;
+}
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+	unsigned int *count;
+	__u32 zero = 0;
+	int verdict;
+
+	if (test_sockmap)
+		verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0);
+	else
+		verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0);
+
+	count = bpf_map_lookup_elem(&verdict_map, &verdict);
+	if (count)
+		(*count)++;
+
+	return verdict;
+}
+
+SEC("sk_reuseport")
+int prog_reuseport(struct sk_reuseport_md *reuse)
+{
+	unsigned int *count;
+	int err, verdict;
+	__u32 zero = 0;
+
+	if (test_sockmap)
+		err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0);
+	else
+		err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0);
+	verdict = err ? SK_DROP : SK_PASS;
+
+	count = bpf_map_lookup_elem(&verdict_map, &verdict);
+	if (count)
+		(*count)++;
+
+	return verdict;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index e51e6e3a81c2..f030e469d05b 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -2,7 +2,8 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <linux/bpf.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct task_struct;
 
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
new file mode 100644
index 000000000000..5611b564d3b1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MY_TV_NSEC 1337
+
+bool tp_called = false;
+bool raw_tp_called = false;
+bool tp_btf_called = false;
+bool kprobe_called = false;
+bool fentry_called = false;
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle__tp(struct trace_event_raw_sys_enter *args)
+{
+	struct __kernel_timespec *ts;
+
+	if (args->id != __NR_nanosleep)
+		return 0;
+
+	ts = (void *)args->args[0];
+	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+		return 0;
+
+	tp_called = true;
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
+{
+	struct __kernel_timespec *ts;
+
+	if (id != __NR_nanosleep)
+		return 0;
+
+	ts = (void *)PT_REGS_PARM1_CORE(regs);
+	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+		return 0;
+
+	raw_tp_called = true;
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
+{
+	struct __kernel_timespec *ts;
+
+	if (id != __NR_nanosleep)
+		return 0;
+
+	ts = (void *)PT_REGS_PARM1_CORE(regs);
+	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+		return 0;
+
+	tp_btf_called = true;
+	return 0;
+}
+
+SEC("kprobe/hrtimer_nanosleep")
+int BPF_KPROBE(handle__kprobe,
+	       ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+	if (rqtp == MY_TV_NSEC)
+		kprobe_called = true;
+	return 0;
+}
+
+SEC("fentry/hrtimer_nanosleep")
+int BPF_PROG(handle__fentry,
+	     ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+	if (rqtp == MY_TV_NSEC)
+		fentry_called = true;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index cb8a04ab7a78..a038e827f850 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
+#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
 
 struct net_device {
 	/* Structure does not need to contain all entries,
@@ -27,16 +29,38 @@ struct xdp_buff {
 	struct xdp_rxq_info *rxq;
 } __attribute__((preserve_access_index));
 
+struct meta {
+	int ifindex;
+	int pkt_len;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
 __u64 test_result_fentry = 0;
-SEC("fentry/_xdp_tx_iptunnel")
+SEC("fentry/FUNC")
 int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
 {
+	struct meta meta;
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+
+	meta.ifindex = xdp->rxq->dev->ifindex;
+	meta.pkt_len = data_end - data;
+	bpf_xdp_output(xdp, &perf_buf_map,
+		       ((__u64) meta.pkt_len << 32) |
+		       BPF_F_CURRENT_CPU,
+		       &meta, sizeof(meta));
+
 	test_result_fentry = xdp->rxq->dev->ifindex;
 	return 0;
 }
 
 __u64 test_result_fexit = 0;
-SEC("fexit/_xdp_tx_iptunnel")
+SEC("fexit/FUNC")
 int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
 {
 	test_result_fexit = ret;
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
new file mode 100644
index 000000000000..4fed2dc25c0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+import collections
+import functools
+import json
+import os
+import socket
+import subprocess
+import unittest
+
+
+# Add the source tree of bpftool and /usr/local/sbin to PATH
+cur_dir = os.path.dirname(os.path.realpath(__file__))
+bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..",
+                                           "tools", "bpf", "bpftool"))
+os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"]
+
+
+class IfaceNotFoundError(Exception):
+    pass
+
+
+class UnprivilegedUserError(Exception):
+    pass
+
+
+def _bpftool(args, json=True):
+    _args = ["bpftool"]
+    if json:
+        _args.append("-j")
+    _args.extend(args)
+
+    return subprocess.check_output(_args)
+
+
+def bpftool(args):
+    return _bpftool(args, json=False).decode("utf-8")
+
+
+def bpftool_json(args):
+    res = _bpftool(args)
+    return json.loads(res)
+
+
+def get_default_iface():
+    for iface in socket.if_nameindex():
+        if iface[1] != "lo":
+            return iface[1]
+    raise IfaceNotFoundError("Could not find any network interface to probe")
+
+
+def default_iface(f):
+    @functools.wraps(f)
+    def wrapper(*args, **kwargs):
+        iface = get_default_iface()
+        return f(*args, iface, **kwargs)
+    return wrapper
+
+
+class TestBpftool(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        if os.getuid() != 0:
+            raise UnprivilegedUserError(
+                "This test suite needs root privileges")
+
+    @default_iface
+    def test_feature_dev_json(self, iface):
+        unexpected_helpers = [
+            "bpf_probe_write_user",
+            "bpf_trace_printk",
+        ]
+        expected_keys = [
+            "syscall_config",
+            "program_types",
+            "map_types",
+            "helpers",
+            "misc",
+        ]
+
+        res = bpftool_json(["feature", "probe", "dev", iface])
+        # Check if the result has all expected keys.
+        self.assertCountEqual(res.keys(), expected_keys)
+        # Check if unexpected helpers are not included in helpers probes
+        # result.
+        for helpers in res["helpers"].values():
+            for unexpected_helper in unexpected_helpers:
+                self.assertNotIn(unexpected_helper, helpers)
+
+    def test_feature_kernel(self):
+        test_cases = [
+            bpftool_json(["feature", "probe", "kernel"]),
+            bpftool_json(["feature", "probe"]),
+            bpftool_json(["feature"]),
+        ]
+        unexpected_helpers = [
+            "bpf_probe_write_user",
+            "bpf_trace_printk",
+        ]
+        expected_keys = [
+            "syscall_config",
+            "system_config",
+            "program_types",
+            "map_types",
+            "helpers",
+            "misc",
+        ]
+
+        for tc in test_cases:
+            # Check if the result has all expected keys.
+            self.assertCountEqual(tc.keys(), expected_keys)
+            # Check if unexpected helpers are not included in helpers probes
+            # result.
+            for helpers in tc["helpers"].values():
+                for unexpected_helper in unexpected_helpers:
+                    self.assertNotIn(unexpected_helper, helpers)
+
+    def test_feature_kernel_full(self):
+        test_cases = [
+            bpftool_json(["feature", "probe", "kernel", "full"]),
+            bpftool_json(["feature", "probe", "full"]),
+        ]
+        expected_helpers = [
+            "bpf_probe_write_user",
+            "bpf_trace_printk",
+        ]
+
+        for tc in test_cases:
+            # Check if expected helpers are included at least once in any
+            # helpers list for any program type. Unfortunately we cannot assume
+            # that they will be included in all program types or a specific
+            # subset of programs. It depends on the kernel version and
+            # configuration.
+            found_helpers = False
+
+            for helpers in tc["helpers"].values():
+                if all(expected_helper in helpers
+                       for expected_helper in expected_helpers):
+                    found_helpers = True
+                    break
+
+            self.assertTrue(found_helpers)
+
+    def test_feature_kernel_full_vs_not_full(self):
+        full_res = bpftool_json(["feature", "probe", "full"])
+        not_full_res = bpftool_json(["feature", "probe"])
+        not_full_set = set()
+        full_set = set()
+
+        for helpers in full_res["helpers"].values():
+            for helper in helpers:
+                full_set.add(helper)
+
+        for helpers in not_full_res["helpers"].values():
+            for helper in helpers:
+                not_full_set.add(helper)
+
+        self.assertCountEqual(full_set - not_full_set,
+                                {"bpf_probe_write_user", "bpf_trace_printk"})
+        self.assertCountEqual(not_full_set - full_set, set())
+
+    def test_feature_macros(self):
+        expected_patterns = [
+            r"/\*\*\* System call availability \*\*\*/",
+            r"#define HAVE_BPF_SYSCALL",
+            r"/\*\*\* eBPF program types \*\*\*/",
+            r"#define HAVE.*PROG_TYPE",
+            r"/\*\*\* eBPF map types \*\*\*/",
+            r"#define HAVE.*MAP_TYPE",
+            r"/\*\*\* eBPF helper functions \*\*\*/",
+            r"#define HAVE.*HELPER",
+            r"/\*\*\* eBPF misc features \*\*\*/",
+        ]
+
+        res = bpftool(["feature", "probe", "macros"])
+        for pattern in expected_patterns:
+            self.assertRegex(res, pattern)
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
new file mode 100755
index 000000000000..66690778e36d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 93040ca83e60..8da77cda5f4a 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1062,6 +1062,48 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "Member exceeds struct_size",
 },
 
+/* Test member unexceeds the size of struct
+ *
+ * enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *     char m;
+ *     enum E __attribute__((packed)) n;
+ * };
+ */
+{
+	.descr = "size check test #5",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* char */			/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),
+		/* enum E { */			/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		/* } */
+		/* struct A { */		/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* char m; */
+		BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0E\0E0\0E1\0A\0m\0n",
+	.str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check5_map",
+	.key_size = sizeof(int),
+	.value_size = 2,
+	.key_type_id = 1,
+	.value_type_id = 4,
+	.max_entries = 4,
+},
+
 /* typedef const void * const_void_ptr;
  * struct A {
  *	const_void_ptr m;
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
new file mode 100644
index 000000000000..ed253f252cd0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include "test_progs.h"
+
+#define CHECK_NEWNS(condition, tag, format...) ({		\
+	int __ret = !!(condition);			\
+	if (__ret) {					\
+		printf("%s:FAIL:%s ", __func__, tag);	\
+		printf(format);				\
+	} else {					\
+		printf("%s:PASS:%s\n", __func__, tag);	\
+	}						\
+	__ret;						\
+})
+
+struct bss {
+	__u64 dev;
+	__u64 ino;
+	__u64 pid_tgid;
+	__u64 user_pid_tgid;
+};
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+	int exit_code = 1;
+	struct stat st;
+
+	printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n");
+
+	if (stat("/proc/self/ns/pid", &st)) {
+		perror("stat failed on /proc/self/ns/pid ns\n");
+		printf("%s:FAILED\n", argv[0]);
+		return exit_code;
+	}
+
+	if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS),
+			"unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno))
+		return exit_code;
+
+	pid = fork();
+	if (pid == -1) {
+		perror("Fork() failed\n");
+		printf("%s:FAILED\n", argv[0]);
+		return exit_code;
+	}
+
+	if (pid > 0) {
+		int status;
+
+		usleep(5);
+		waitpid(pid, &status, 0);
+		return 0;
+	} else {
+
+		pid = fork();
+		if (pid == -1) {
+			perror("Fork() failed\n");
+			printf("%s:FAILED\n", argv[0]);
+			return exit_code;
+		}
+
+		if (pid > 0) {
+			int status;
+			waitpid(pid, &status, 0);
+			return 0;
+		} else {
+			if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL),
+				"Unmounting proc", "Cannot umount proc! errno=%d\n", errno))
+				return exit_code;
+
+			if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL),
+				"Mounting proc", "Cannot mount proc! errno=%d\n", errno))
+				return exit_code;
+
+			const char *probe_name = "raw_tracepoint/sys_enter";
+			const char *file = "test_ns_current_pid_tgid.o";
+			struct bpf_link *link = NULL;
+			struct bpf_program *prog;
+			struct bpf_map *bss_map;
+			struct bpf_object *obj;
+			int exit_code = 1;
+			int err, key = 0;
+			struct bss bss;
+			struct stat st;
+			__u64 id;
+
+			obj = bpf_object__open_file(file, NULL);
+			if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+				return exit_code;
+
+			err = bpf_object__load(obj);
+			if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno))
+				goto cleanup;
+
+			bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+			if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n"))
+				goto cleanup;
+
+			prog = bpf_object__find_program_by_title(obj, probe_name);
+			if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n",
+						probe_name))
+				goto cleanup;
+
+			memset(&bss, 0, sizeof(bss));
+			pid_t tid = syscall(SYS_gettid);
+			pid_t pid = getpid();
+
+			id = (__u64) tid << 32 | pid;
+			bss.user_pid_tgid = id;
+
+			if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st),
+				"stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno))
+				goto cleanup;
+
+			bss.dev = st.st_dev;
+			bss.ino = st.st_ino;
+
+			err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+			if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err))
+				goto cleanup;
+
+			link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+			if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+						PTR_ERR(link))) {
+				link = NULL;
+				goto cleanup;
+			}
+
+			/* trigger some syscalls */
+			usleep(1);
+
+			err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+			if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err))
+				goto cleanup;
+
+			if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+						"User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+				goto cleanup;
+
+			exit_code = 0;
+			printf("%s:PASS\n", argv[0]);
+cleanup:
+			if (!link) {
+				bpf_link__destroy(link);
+				link = NULL;
+			}
+			bpf_object__close(obj);
+		}
+	}
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 02eae1e864c2..c6766b2cff85 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	/* Test update without programs */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
-		if (i < 2 && !err) {
-			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
-			       i, sfd[i]);
-			goto out_sockmap;
-		} else if (i >= 2 && err) {
+		if (err) {
 			printf("Failed noprog update sockmap '%i:%i'\n",
 			       i, sfd[i]);
 			goto out_sockmap;
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index bab1e6f1d8f1..b521e0a512b6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1,11 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Facebook
  */
+#define _GNU_SOURCE
 #include "test_progs.h"
 #include "cgroup_helpers.h"
 #include "bpf_rlimit.h"
 #include <argp.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
 #include <string.h>
+#include <execinfo.h> /* backtrace */
 
 /* defined in test_progs.h */
 struct test_env env = {};
@@ -27,6 +32,20 @@ struct prog_test_def {
 	int old_error_cnt;
 };
 
+/* Override C runtime library's usleep() implementation to ensure nanosleep()
+ * is always called. Usleep is frequently used in selftests as a way to
+ * trigger kprobe and tracepoints.
+ */
+int usleep(useconds_t usec)
+{
+	struct timespec ts = {
+		.tv_sec = usec / 1000000,
+		.tv_nsec = (usec % 1000000) * 1000,
+	};
+
+	return syscall(__NR_nanosleep, &ts, NULL);
+}
+
 static bool should_run(struct test_selector *sel, int num, const char *name)
 {
 	int i;
@@ -74,6 +93,34 @@ static void skip_account(void)
 	}
 }
 
+static void stdio_restore(void);
+
+/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
+ * it after each test/sub-test.
+ */
+static void reset_affinity() {
+
+	cpu_set_t cpuset;
+	int i, err;
+
+	CPU_ZERO(&cpuset);
+	for (i = 0; i < env.nr_cpus; i++)
+		CPU_SET(i, &cpuset);
+
+	err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+	if (err < 0) {
+		stdio_restore();
+		fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
+		exit(-1);
+	}
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+	if (err < 0) {
+		stdio_restore();
+		fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
+		exit(-1);
+	}
+}
+
 void test__end_subtest()
 {
 	struct prog_test_def *test = env.test;
@@ -91,6 +138,8 @@ void test__end_subtest()
 	       test->test_num, test->subtest_num,
 	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
 
+	reset_affinity();
+
 	free(test->subtest_name);
 	test->subtest_name = NULL;
 }
@@ -196,7 +245,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
 
 	map = bpf_object__find_map_by_name(obj, name);
 	if (!map) {
-		printf("%s:FAIL:map '%s' not found\n", test, name);
+		fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name);
 		test__fail();
 		return -1;
 	}
@@ -367,7 +416,7 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 {
 	if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
 		return 0;
-	vprintf(format, args);
+	vfprintf(stdout, format, args);
 	return 0;
 }
 
@@ -408,7 +457,7 @@ err:
 
 int parse_num_list(const char *s, struct test_selector *sel)
 {
-	int i, set_len = 0, num, start = 0, end = -1;
+	int i, set_len = 0, new_len, num, start = 0, end = -1;
 	bool *set = NULL, *tmp, parsing_end = false;
 	char *next;
 
@@ -443,18 +492,19 @@ int parse_num_list(const char *s, struct test_selector *sel)
 			return -EINVAL;
 
 		if (end + 1 > set_len) {
-			set_len = end + 1;
-			tmp = realloc(set, set_len);
+			new_len = end + 1;
+			tmp = realloc(set, new_len);
 			if (!tmp) {
 				free(set);
 				return -ENOMEM;
 			}
+			for (i = set_len; i < start; i++)
+				tmp[i] = false;
 			set = tmp;
+			set_len = new_len;
 		}
-		for (i = start; i <= end; i++) {
+		for (i = start; i <= end; i++)
 			set[i] = true;
-		}
-
 	}
 
 	if (!set)
@@ -613,10 +663,27 @@ int cd_flavor_subdir(const char *exec_name)
 	if (!flavor)
 		return 0;
 	flavor++;
-	printf("Switching to flavor '%s' subdirectory...\n", flavor);
+	fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
 	return chdir(flavor);
 }
 
+#define MAX_BACKTRACE_SZ 128
+void crash_handler(int signum)
+{
+	void *bt[MAX_BACKTRACE_SZ];
+	size_t sz;
+
+	sz = backtrace(bt, ARRAY_SIZE(bt));
+
+	if (env.test)
+		dump_test_log(env.test, true);
+	if (env.stdout)
+		stdio_restore();
+
+	fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
+	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -624,8 +691,14 @@ int main(int argc, char **argv)
 		.parser = parse_arg,
 		.doc = argp_program_doc,
 	};
+	struct sigaction sigact = {
+		.sa_handler = crash_handler,
+		.sa_flags = SA_RESETHAND,
+	};
 	int err, i;
 
+	sigaction(SIGSEGV, &sigact, NULL);
+
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
 	if (err)
 		return err;
@@ -639,6 +712,12 @@ int main(int argc, char **argv)
 	srand(time(NULL));
 
 	env.jit_enabled = is_jit_enabled();
+	env.nr_cpus = libbpf_num_possible_cpus();
+	if (env.nr_cpus < 0) {
+		fprintf(stderr, "Failed to get number of CPUs: %d!\n",
+			env.nr_cpus);
+		return -1;
+	}
 
 	stdio_hijack();
 	for (i = 0; i < prog_test_cnt; i++) {
@@ -669,12 +748,13 @@ int main(int argc, char **argv)
 			test->test_num, test->test_name,
 			test->error_cnt ? "FAIL" : "OK");
 
+		reset_affinity();
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
 	stdio_restore();
-	printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
-	       env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
 	free(env.test_selector.blacklist.strs);
 	free(env.test_selector.whitelist.strs);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index bcfa9ef23fda..f4aff6b8284b 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -71,6 +71,7 @@ struct test_env {
 	FILE *stderr;
 	char *log_buf;
 	size_t log_cnt;
+	int nr_cpus;
 
 	int succ_cnt; /* successful tests */
 	int sub_succ_cnt; /* successful sub-tests */
@@ -109,10 +110,10 @@ extern struct ipv6_packet pkt_v6;
 	int __save_errno = errno;					\
 	if (__ret) {							\
 		test__fail();						\
-		printf("%s:FAIL:%s ", __func__, tag);			\
-		printf(format);						\
+		fprintf(stdout, "%s:FAIL:%s ", __func__, tag);		\
+		fprintf(stdout, ##format);				\
 	} else {							\
-		printf("%s:PASS:%s %d nsec\n",				\
+		fprintf(stdout, "%s:PASS:%s %d nsec\n",			\
 		       __func__, tag, duration);			\
 	}								\
 	errno = __save_errno;						\
@@ -124,7 +125,7 @@ extern struct ipv6_packet pkt_v6;
 	int __save_errno = errno;					\
 	if (__ret) {							\
 		test__fail();						\
-		printf("%s:FAIL:%d\n", __func__, __LINE__);		\
+		fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__);	\
 	}								\
 	errno = __save_errno;						\
 	__ret;								\
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7f989b3e4e22..4d0e913bbb22 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -4,12 +4,15 @@
 #include <string.h>
 #include <assert.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <poll.h>
 #include <unistd.h>
 #include <linux/perf_event.h>
 #include <sys/mman.h>
 #include "trace_helpers.h"
 
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
 #define MAX_SYMS 300000
 static struct ksym syms[MAX_SYMS];
 static int sym_cnt;
@@ -86,3 +89,23 @@ long ksym_get_addr(const char *name)
 
 	return 0;
 }
+
+void read_trace_pipe(void)
+{
+	int trace_fd;
+
+	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+	if (trace_fd < 0)
+		return;
+
+	while (1) {
+		static char buf[4096];
+		ssize_t sz;
+
+		sz = read(trace_fd, buf, sizeof(buf) - 1);
+		if (sz > 0) {
+			buf[sz] = 0;
+			puts(buf);
+		}
+	}
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 0383c9b8adc1..25ef597dd03f 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,5 +12,6 @@ struct ksym {
 int load_kallsyms(void);
 struct ksym *ksym_search(long key);
 long ksym_get_addr(const char *name);
+void read_trace_pipe(void);
 
 #endif
diff --git a/tools/testing/selftests/bpf/verifier/.gitignore b/tools/testing/selftests/bpf/verifier/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/verifier/.gitignore
+++ b/tools/testing/selftests/bpf/verifier/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tests.h
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index d55f476f2237..4d0d09574bf4 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -257,17 +257,15 @@
 	 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
 	 */
 	BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
-	/* no-op or OOB pointer computation */
+	/* error on OOB pointer computation */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
-	/* potentially OOB access */
-	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 	/* exit */
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
 	/* not actually fully unbounded, but the bound is very high */
-	.errstr = "R0 unbounded memory access",
+	.errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
 	.result = REJECT
 },
 {
@@ -299,17 +297,15 @@
 	 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
 	 */
 	BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
-	/* no-op or OOB pointer computation */
+	/* error on OOB pointer computation */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
-	/* potentially OOB access */
-	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 	/* exit */
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
 	/* not actually fully unbounded, but the bound is very high */
-	.errstr = "R0 unbounded memory access",
+	.errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
 	.result = REJECT
 },
 {
@@ -411,16 +407,14 @@
 	BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31),
 	/* r1 = 0xffff'fffe (NOT 0!) */
 	BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2),
-	/* computes OOB pointer */
+	/* error on computing OOB pointer */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
-	/* OOB access */
-	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 	/* exit */
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 invalid mem access",
+	.errstr = "math between map_value pointer and 4294967294 is not allowed",
 	.result = REJECT,
 },
 {
@@ -506,3 +500,42 @@
 	.errstr = "map_value pointer and 1000000000000",
 	.result = REJECT
 },
+{
+	"bounds check mixed 32bit and 64bit arithmatic. test1",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_1, -1),
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+	/* r1 = 0xffffFFFF00000001 */
+	BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3),
+	/* check ALU64 op keeps 32bit bounds */
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+	BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+	BPF_JMP_A(1),
+	/* invalid ldx if bounds are lost above */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT
+},
+{
+	"bounds check mixed 32bit and 64bit arithmatic. test2",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_1, -1),
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+	/* r1 = 0xffffFFFF00000001 */
+	BPF_MOV64_IMM(BPF_REG_2, 3),
+	/* r1 = 0x2 */
+	BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+	/* check ALU32 op zero extends 64bit bounds */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1),
+	BPF_JMP_A(1),
+	/* invalid ldx if bounds are lost above */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT
+},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index f24d50f09dbe..69b048cf46d9 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -9,17 +9,17 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
-	BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
+	BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
-	BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
+	BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
 	BPF_MOV64_IMM(BPF_REG_4, 256),
 	BPF_EMIT_CALL(BPF_FUNC_get_stack),
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
 	BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
 	BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
-	BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
+	BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16),
 	BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
@@ -29,7 +29,7 @@
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-	BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
+	BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index 92762c08f5e3..93d6b1641481 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -91,3 +91,108 @@
 	.result = REJECT,
 	.errstr = "variable ctx access var_off=(0x0; 0x4)",
 },
+{
+	"pass ctx or null check, 1: ctx",
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+	.result = ACCEPT,
+},
+{
+	"pass ctx or null check, 2: null",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+	.result = ACCEPT,
+},
+{
+	"pass ctx or null check, 3: 1",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+	.result = REJECT,
+	.errstr = "R1 type=inv expected=ctx",
+},
+{
+	"pass ctx or null check, 4: ctx - const",
+	.insns = {
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+	.result = REJECT,
+	.errstr = "dereference of modified ctx ptr",
+},
+{
+	"pass ctx or null check, 5: null (connect)",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	.expected_attach_type = BPF_CGROUP_INET4_CONNECT,
+	.result = ACCEPT,
+},
+{
+	"pass ctx or null check, 6: null (bind)",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_netns_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+	.result = ACCEPT,
+},
+{
+	"pass ctx or null check, 7: ctx (bind)",
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_socket_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+	.result = ACCEPT,
+},
+{
+	"pass ctx or null check, 8: null (bind)",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_socket_cookie),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+	.result = REJECT,
+	.errstr = "R1 type=inv expected=ctx",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index d438193804b2..2e16b8e268f2 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1011,6 +1011,53 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
 {
+	"read gso_size from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read gso_size from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+		    offsetof(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"write gso_size from CGROUP_SKB",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+		    offsetof(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.result_unpriv = REJECT,
+	.errstr = "invalid bpf_context access off=176 size=4",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read gso_size from CLS",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
 	"check wire_len is not readable by sockets",
 	.insns = {
 		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bf0322eb5346..bd5cae4a7f73 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -62,6 +62,21 @@
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
+	"jset32: ignores upper bits",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_LD_IMM64(BPF_REG_7, 0x8000000000000000),
+	BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000),
+	BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
+{
 	"jset32: min/max deduction",
 	.insns = {
 	BPF_RAND_UEXT_R7,
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
index a23bb4a6f06c..def2e97dab9a 100644
--- a/tools/testing/selftests/breakpoints/.gitignore
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 breakpoint_test
 step_after_suspend_test
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
index b732dd0d4738..426d9adca67c 100644
--- a/tools/testing/selftests/capabilities/.gitignore
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 test_execve
 validate_cap
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 7f9835624793..aa6de65b0838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 test_memcontrol
 test_core
 test_freezer
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 66aafe1f5746..967f268fde74 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -11,6 +11,6 @@ TEST_GEN_PROGS += test_freezer
 
 include ../lib.mk
 
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
+$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8f7131dcf1ff..8a637ca7d73a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -15,6 +15,7 @@
 #include <unistd.h>
 
 #include "cgroup_util.h"
+#include "../clone3/clone3_selftests.h"
 
 static ssize_t read_text(const char *path, char *buf, size_t max_len)
 {
@@ -331,12 +332,112 @@ int cg_run(const char *cgroup,
 	}
 }
 
+pid_t clone_into_cgroup(int cgroup_fd)
+{
+#ifdef CLONE_ARGS_SIZE_VER2
+	pid_t pid;
+
+	struct clone_args args = {
+		.flags = CLONE_INTO_CGROUP,
+		.exit_signal = SIGCHLD,
+		.cgroup = cgroup_fd,
+	};
+
+	pid = sys_clone3(&args, sizeof(struct clone_args));
+	/*
+	 * Verify that this is a genuine test failure:
+	 * ENOSYS -> clone3() not available
+	 * E2BIG  -> CLONE_INTO_CGROUP not available
+	 */
+	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
+		goto pretend_enosys;
+
+	return pid;
+
+pretend_enosys:
+#endif
+	errno = ENOSYS;
+	return -ENOSYS;
+}
+
+int clone_reap(pid_t pid, int options)
+{
+	int ret;
+	siginfo_t info = {
+		.si_signo = 0,
+	};
+
+again:
+	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
+	if (ret < 0) {
+		if (errno == EINTR)
+			goto again;
+		return -1;
+	}
+
+	if (options & WEXITED) {
+		if (WIFEXITED(info.si_status))
+			return WEXITSTATUS(info.si_status);
+	}
+
+	if (options & WSTOPPED) {
+		if (WIFSTOPPED(info.si_status))
+			return WSTOPSIG(info.si_status);
+	}
+
+	if (options & WCONTINUED) {
+		if (WIFCONTINUED(info.si_status))
+			return 0;
+	}
+
+	return -1;
+}
+
+int dirfd_open_opath(const char *dir)
+{
+	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
+}
+
+#define close_prot_errno(fd)                                                   \
+	if (fd >= 0) {                                                         \
+		int _e_ = errno;                                               \
+		close(fd);                                                     \
+		errno = _e_;                                                   \
+	}
+
+static int clone_into_cgroup_run_nowait(const char *cgroup,
+					int (*fn)(const char *cgroup, void *arg),
+					void *arg)
+{
+	int cgroup_fd;
+	pid_t pid;
+
+	cgroup_fd =  dirfd_open_opath(cgroup);
+	if (cgroup_fd < 0)
+		return -1;
+
+	pid = clone_into_cgroup(cgroup_fd);
+	close_prot_errno(cgroup_fd);
+	if (pid == 0)
+		exit(fn(cgroup, arg));
+
+	return pid;
+}
+
 int cg_run_nowait(const char *cgroup,
 		  int (*fn)(const char *cgroup, void *arg),
 		  void *arg)
 {
 	int pid;
 
+	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
+	if (pid > 0)
+		return pid;
+
+	/* Genuine test failure. */
+	if (pid < 0 && errno != ENOSYS)
+		return -1;
+
 	pid = fork();
 	if (pid == 0) {
 		char buf[64];
@@ -450,3 +551,28 @@ int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
 
 	return strstr(buf, needle) ? 0 : -1;
 }
+
+int clone_into_cgroup_run_wait(const char *cgroup)
+{
+	int cgroup_fd;
+	pid_t pid;
+
+	cgroup_fd =  dirfd_open_opath(cgroup);
+	if (cgroup_fd < 0)
+		return -1;
+
+	pid = clone_into_cgroup(cgroup_fd);
+	close_prot_errno(cgroup_fd);
+	if (pid < 0)
+		return -1;
+
+	if (pid == 0)
+		exit(EXIT_SUCCESS);
+
+	/*
+	 * We don't care whether this fails. We only care whether the initial
+	 * clone succeeded.
+	 */
+	(void)clone_reap(pid, WEXITED);
+	return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 49c54fbdb229..5a1305dd1f0b 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -50,3 +50,7 @@ extern int cg_wait_for_proc_count(const char *cgroup, int count);
 extern int cg_killall(const char *cgroup);
 extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
 extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
+extern pid_t clone_into_cgroup(int cgroup_fd);
+extern int clone_reap(pid_t pid, int options);
+extern int clone_into_cgroup_run_wait(const char *cgroup);
+extern int dirfd_open_opath(const char *dir);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index e19ce940cd6a..3df648c37876 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -2,7 +2,10 @@
 
 #include <linux/limits.h>
 #include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
 #include <signal.h>
@@ -12,6 +15,115 @@
 #include "../kselftest.h"
 #include "cgroup_util.h"
 
+static int touch_anon(char *buf, size_t size)
+{
+	int fd;
+	char *pos = buf;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	while (size > 0) {
+		ssize_t ret = read(fd, pos, size);
+
+		if (ret < 0) {
+			if (errno != EINTR) {
+				close(fd);
+				return -1;
+			}
+		} else {
+			pos += ret;
+			size -= ret;
+		}
+	}
+	close(fd);
+
+	return 0;
+}
+
+static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg)
+{
+	int ppid = getppid();
+	size_t size = (size_t)arg;
+	void *buf;
+
+	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+		   0, 0);
+	if (buf == MAP_FAILED)
+		return -1;
+
+	if (touch_anon((char *)buf, size)) {
+		munmap(buf, size);
+		return -1;
+	}
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	munmap(buf, size);
+	return 0;
+}
+
+/*
+ * Create a child process that allocates and touches 100MB, then waits to be
+ * killed. Wait until the child is attached to the cgroup, kill all processes
+ * in that cgroup and wait until "cgroup.procs" is empty. At this point try to
+ * destroy the empty cgroup. The test helps detect race conditions between
+ * dying processes leaving the cgroup and cgroup destruction path.
+ */
+static int test_cgcore_destroy(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cg_test = NULL;
+	int child_pid;
+	char buf[PAGE_SIZE];
+
+	cg_test = cg_name(root, "cg_test");
+
+	if (!cg_test)
+		goto cleanup;
+
+	for (int i = 0; i < 10; i++) {
+		if (cg_create(cg_test))
+			goto cleanup;
+
+		child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit,
+					  (void *) MB(100));
+
+		if (child_pid < 0)
+			goto cleanup;
+
+		/* wait for the child to enter cgroup */
+		if (cg_wait_for_proc_count(cg_test, 1))
+			goto cleanup;
+
+		if (cg_killall(cg_test))
+			goto cleanup;
+
+		/* wait for cgroup to be empty */
+		while (1) {
+			if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf)))
+				goto cleanup;
+			if (buf[0] == '\0')
+				break;
+			usleep(1000);
+		}
+
+		if (rmdir(cg_test))
+			goto cleanup;
+
+		if (waitpid(child_pid, NULL, 0) < 0)
+			goto cleanup;
+	}
+	ret = KSFT_PASS;
+cleanup:
+	if (cg_test)
+		cg_destroy(cg_test);
+	free(cg_test);
+	return ret;
+}
+
 /*
  * A(0) - B(0) - C(1)
  *        \ D(0)
@@ -25,8 +137,11 @@
 static int test_cgcore_populated(const char *root)
 {
 	int ret = KSFT_FAIL;
+	int err;
 	char *cg_test_a = NULL, *cg_test_b = NULL;
 	char *cg_test_c = NULL, *cg_test_d = NULL;
+	int cgroup_fd = -EBADF;
+	pid_t pid;
 
 	cg_test_a = cg_name(root, "cg_test_a");
 	cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
@@ -78,6 +193,52 @@ static int test_cgcore_populated(const char *root)
 	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
 		goto cleanup;
 
+	/* Test that we can directly clone into a new cgroup. */
+	cgroup_fd = dirfd_open_opath(cg_test_d);
+	if (cgroup_fd < 0)
+		goto cleanup;
+
+	pid = clone_into_cgroup(cgroup_fd);
+	if (pid < 0) {
+		if (errno == ENOSYS)
+			goto cleanup_pass;
+		goto cleanup;
+	}
+
+	if (pid == 0) {
+		if (raise(SIGSTOP))
+			exit(EXIT_FAILURE);
+		exit(EXIT_SUCCESS);
+	}
+
+	err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n");
+
+	(void)clone_reap(pid, WSTOPPED);
+	(void)kill(pid, SIGCONT);
+	(void)clone_reap(pid, WEXITED);
+
+	if (err)
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	/* Remove cgroup. */
+	if (cg_test_d) {
+		cg_destroy(cg_test_d);
+		free(cg_test_d);
+		cg_test_d = NULL;
+	}
+
+	pid = clone_into_cgroup(cgroup_fd);
+	if (pid < 0)
+		goto cleanup_pass;
+	if (pid == 0)
+		exit(EXIT_SUCCESS);
+	(void)clone_reap(pid, WEXITED);
+	goto cleanup;
+
+cleanup_pass:
 	ret = KSFT_PASS;
 
 cleanup:
@@ -93,6 +254,8 @@ cleanup:
 	free(cg_test_c);
 	free(cg_test_b);
 	free(cg_test_a);
+	if (cgroup_fd >= 0)
+		close(cgroup_fd);
 	return ret;
 }
 
@@ -136,6 +299,16 @@ static int test_cgcore_invalid_domain(const char *root)
 	if (errno != EOPNOTSUPP)
 		goto cleanup;
 
+	if (!clone_into_cgroup_run_wait(child))
+		goto cleanup;
+
+	if (errno == ENOSYS)
+		goto cleanup_pass;
+
+	if (errno != EOPNOTSUPP)
+		goto cleanup;
+
+cleanup_pass:
 	ret = KSFT_PASS;
 
 cleanup:
@@ -345,6 +518,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
 	if (!cg_enter_current(parent))
 		goto cleanup;
 
+	if (!clone_into_cgroup_run_wait(parent))
+		goto cleanup;
+
 	ret = KSFT_PASS;
 
 cleanup:
@@ -512,6 +688,7 @@ struct corecg_test {
 	T(test_cgcore_populated),
 	T(test_cgcore_proc_migration),
 	T(test_cgcore_thread_migration),
+	T(test_cgcore_destroy),
 };
 #undef T
 
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
index 0dc4f32c6cb8..a81085742d40 100644
--- a/tools/testing/selftests/clone3/.gitignore
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 clone3
 clone3_clear_sighand
 clone3_set_tid
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index a3f2c8ad8bcc..91c1a78ddb39 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -5,12 +5,24 @@
 
 #define _GNU_SOURCE
 #include <sched.h>
+#include <linux/sched.h>
+#include <linux/types.h>
 #include <stdint.h>
 #include <syscall.h>
-#include <linux/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
 
 #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
 
+#ifndef CLONE_INTO_CGROUP
+#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#endif
+
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64
+#endif
+
 #ifndef __NR_clone3
 #define __NR_clone3 -1
 struct clone_args {
@@ -22,10 +34,13 @@ struct clone_args {
 	__aligned_u64 stack;
 	__aligned_u64 stack_size;
 	__aligned_u64 tls;
+#define CLONE_ARGS_SIZE_VER1 80
 	__aligned_u64 set_tid;
 	__aligned_u64 set_tid_size;
+#define CLONE_ARGS_SIZE_VER2 88
+	__aligned_u64 cgroup;
 };
-#endif
+#endif /* __NR_clone3 */
 
 static pid_t sys_clone3(struct clone_args *args, size_t size)
 {
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore
index f6aebcc27b76..ca74f2e1c719 100644
--- a/tools/testing/selftests/drivers/.gitignore
+++ b/tools/testing/selftests/drivers/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /dma-buf/udmabuf
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
index 5ba5bef44d5b..bdffe698e1d1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
@@ -45,6 +45,7 @@ ALL_TESTS="
 	blackhole_ipv6
 "
 NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
 
@@ -123,7 +124,7 @@ blackhole_ipv4()
 		skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \
 		action pass
 
-	ip -4 route show 198.51.100.0/30 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30
 	check_err $? "route not marked as offloaded when should"
 
 	ping_do $h1 198.51.100.1
@@ -147,7 +148,7 @@ blackhole_ipv6()
 		skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \
 		ip_proto icmpv6 action pass
 
-	ip -6 route show 2001:db8:2::/120 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120
 	check_err $? "route not marked as offloaded when should"
 
 	ping6_do $h1 2001:db8:2::1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
new file mode 100755
index 000000000000..26044e397157
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap ACL drops functionality over mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ingress_flow_action_drop_test
+	egress_flow_action_drop_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ingress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101
+
+	log_test "ingress_flow_action_drop"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+egress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102
+
+	log_test "egress_flow_action_drop"
+
+	tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index 58cdbfb608e9..e7aecb065409 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -107,11 +107,11 @@ source_mac_is_multicast_test()
 
 	RET=0
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	log_test "Source MAC is multicast"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 }
 
 __vlan_tag_mismatch_test()
@@ -132,7 +132,7 @@ __vlan_tag_mismatch_test()
 	$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Add PVID and make sure packets are no longer dropped.
 	bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -148,7 +148,7 @@ __vlan_tag_mismatch_test()
 
 	devlink_trap_action_set $trap_name "drop"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 }
 
 vlan_tag_mismatch_untagged_test()
@@ -193,7 +193,7 @@ ingress_vlan_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Add the VLAN on the bridge port and make sure packets are no longer
 	# dropped.
@@ -212,7 +212,7 @@ ingress_vlan_filter_test()
 
 	log_test "Ingress VLAN filter"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 
 	bridge vlan del vid $vid dev $swp1 master
 	bridge vlan del vid $vid dev $swp2 master
@@ -237,7 +237,7 @@ __ingress_stp_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Change STP state to forwarding and make sure packets are no longer
 	# dropped.
@@ -254,7 +254,7 @@ __ingress_stp_filter_test()
 
 	devlink_trap_action_set $trap_name "drop"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 
 	bridge vlan del vid $vid dev $swp1 master
 	bridge vlan del vid $vid dev $swp2 master
@@ -308,7 +308,7 @@ port_list_is_empty_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -326,7 +326,7 @@ port_list_is_empty_uc_test()
 
 	log_test "Port list is empty - unicast"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 
 	ip link set dev $swp1 type bridge_slave flood on
 }
@@ -354,7 +354,7 @@ port_list_is_empty_mc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -372,7 +372,7 @@ port_list_is_empty_mc_test()
 
 	log_test "Port list is empty - multicast"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 
 	ip link set dev $swp1 type bridge_slave mcast_flood on
 }
@@ -401,7 +401,7 @@ port_loopback_filter_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2
+	devlink_trap_drop_test $trap_name $group_name $swp2 101
 
 	# Allow packets to be flooded.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -419,7 +419,7 @@ port_loopback_filter_uc_test()
 
 	log_test "Port loopback filter - unicast"
 
-	devlink_trap_drop_cleanup $mz_pid $swp2 ip
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
 }
 
 port_loopback_filter_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index d88d8e47d11b..616f47d86a61 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -176,11 +176,11 @@ non_ip_test()
 		00:00 de:ad:be:ef" &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "Non IP"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
 }
 
 __uc_dip_over_mc_dmac_test()
@@ -206,11 +206,11 @@ __uc_dip_over_mc_dmac_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "Unicast destination IP over multicast destination MAC: $desc"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
 }
 
 uc_dip_over_mc_dmac_test()
@@ -242,11 +242,11 @@ __sip_is_loopback_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "Source IP is loopback address: $desc"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
 }
 
 sip_is_loopback_test()
@@ -277,11 +277,11 @@ __dip_is_loopback_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "Destination IP is loopback address: $desc"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
 }
 
 dip_is_loopback_test()
@@ -313,11 +313,11 @@ __sip_is_mc_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "Source IP is multicast: $desc"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
 }
 
 sip_is_mc_test()
@@ -345,11 +345,11 @@ ipv4_sip_is_limited_bc_test()
 		-B $h2_ipv4 -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "IPv4 source IP is limited broadcast"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
 }
 
 ipv4_payload_get()
@@ -399,11 +399,11 @@ __ipv4_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv4"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
 }
 
 ipv6_payload_get()
@@ -446,11 +446,11 @@ __ipv6_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv6"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
 }
 
 ip_header_corrupted_test()
@@ -485,11 +485,11 @@ ipv6_mc_dip_reserved_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "IPv6 multicast destination IP reserved scope"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
 }
 
 ipv6_mc_dip_interface_local_scope_test()
@@ -511,11 +511,11 @@ ipv6_mc_dip_interface_local_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 
 	log_test "IPv6 multicast destination IP interface-local scope"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
 }
 
 __blackhole_route_test()
@@ -542,10 +542,10 @@ __blackhole_route_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2
+	devlink_trap_drop_test $trap_name $group_name $rp2 101
 	log_test "Blackhole route: IPv$flags"
 
-	devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
 	ip -$flags route del blackhole $subnet
 }
 
@@ -641,13 +641,9 @@ erif_disabled_test()
 	mz_pid=$!
 
 	sleep 5
-	# In order to see this trap we need a route that points to disabled RIF.
-	# When ipv6 address is flushed, there is a delay and the routes are
-	# deleted before the RIF and we cannot get state that we have route
-	# to disabled RIF.
-	# Delete IPv6 address first and then check this trap with flushing IPv4.
-	ip -6 add flush dev br0
-	ip -4 add flush dev br0
+	# Unlinking the port from the bridge will disable the RIF associated
+	# with br0 as it is no longer an upper of any mlxsw port.
+	ip link set dev $rp1 nomaster
 
 	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
 	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
@@ -659,7 +655,6 @@ erif_disabled_test()
 	log_test "Egress RIF disabled"
 
 	kill $mz_pid && wait $mz_pid &> /dev/null
-	ip link set dev $rp1 nomaster
 	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
 	ip link del dev br0 type bridge
 	devlink_trap_action_set $trap_name "drop"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
new file mode 100755
index 000000000000..47edf099a17e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -0,0 +1,384 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap policer functionality over mlxsw.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |                                                                           |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |                            |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rate_limits_test
+	burst_limits_test
+	rate_test
+	burst_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	mtu_set $h1 10000
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24
+	mtu_set $h2 10000
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	mtu_restore $h2
+	simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24
+	__addr_add_del $rp2 add 198.51.100.2/24
+	mtu_set $rp1 10000
+	mtu_set $rp2 10000
+
+	ip -4 route add blackhole 198.51.100.100
+
+	devlink trap set $DEVLINK_DEV trap blackhole_route action trap
+}
+
+router_destroy()
+{
+	devlink trap set $DEVLINK_DEV trap blackhole_route action drop
+
+	ip -4 route del blackhole 198.51.100.100
+
+	mtu_restore $rp2
+	mtu_restore $rp1
+	__addr_add_del $rp2 del 198.51.100.2/24
+	__addr_add_del $rp1 del 192.0.2.2/24
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1_mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	# Reload to ensure devlink-trap settings are back to default.
+	devlink_reload
+}
+
+rate_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+	check_fail $? "Policer rate was changed to rate lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		rate 2000000001 &> /dev/null
+	check_fail $? "Policer rate was changed to rate higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 1
+	check_err $? "Failed to set policer rate to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000
+	check_err $? "Failed to set policer rate to maximum"
+
+	log_test "Trap policer rate limits"
+}
+
+burst_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null
+	check_fail $? "Policer burst size was changed to 0"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size that is not power of 2"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		burst $((2**25)) &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 16
+	check_err $? "Failed to set policer burst size to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24))
+	check_err $? "Failed to set policer burst size to maximum"
+
+	log_test "Trap policer burst size limits"
+}
+
+trap_rate_get()
+{
+	local t0 t1
+
+	t0=$(devlink_trap_rx_packets_get blackhole_route)
+	sleep 10
+	t1=$(devlink_trap_rx_packets_get blackhole_route)
+
+	echo $(((t1 - t0) / 10))
+}
+
+policer_drop_rate_get()
+{
+	local id=$1; shift
+	local t0 t1
+
+	t0=$(devlink_trap_policer_rx_dropped_get $id)
+	sleep 10
+	t1=$(devlink_trap_policer_rx_dropped_get $id)
+
+	echo $(((t1 - t0) / 10))
+}
+
+__rate_test()
+{
+	local rate pct drop_rate
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send packets at highest possible rate and make sure they are dropped
+	# by the policer. Make sure measured received rate is about 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+	sleep 5 # Take measurements when rate is stable
+
+	rate=$(trap_rate_get)
+	pct=$((100 * (rate - 1000) / 1000))
+	((-5 <= pct && pct <= 5))
+	check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+	log_info "Expected rate 1000 pps, measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate > 0 ))
+	check_err $? "Expected non-zero policer drop rate, got 0"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	# Send packets at a rate of 1000 pps and make sure they are not dropped
+	# by the policer
+	log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+
+	sleep 5 # Take measurements when rate is stable
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	# Unbind the policer and send packets at highest possible rate. Make
+	# sure they are not dropped by the policer and that the measured
+	# received rate is higher than 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+	rate=$(trap_rate_get)
+	(( rate > 1000 ))
+	check_err $? "Expected rate higher than 1000 pps, got $rate pps"
+	log_info "Measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	log_test "Trap policer rate"
+}
+
+rate_test()
+{
+	local id
+
+	for id in $(devlink_trap_policer_ids_get); do
+		echo
+		log_info "Running rate test for policer $id"
+		__rate_test $id
+	done
+}
+
+__burst_test()
+{
+	local t0_rx t0_drop t1_rx t1_drop rx drop
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send a burst of 64 packets and make sure that about 32 are received
+	# and the rest are dropped by the policer
+	log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	pct=$((100 * (rx - 32) / 32))
+	((-20 <= pct && pct <= 20))
+	check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
+	log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop > 0 ))
+	check_err $? "Expected non-zero policer drops, got 0"
+	log_info "Measured policer drops of $drop packets"
+
+	# Send a burst of 16 packets and make sure that 16 are received
+	# and that none are dropped by the policer
+	log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 16 ))
+	check_err $? "Expected burst size of 16 packets, got $rx packets"
+	log_info "Expected burst size of 16 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	# Unbind the policer and send a burst of 64 packets. Make sure that
+	# 64 packets are received and that none are dropped by the policer
+	log_info "=== Tx burst size: 64, Policer burst size: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 64 ))
+	check_err $? "Expected burst size of 64 packets, got $rx packets"
+	log_info "Expected burst size of 64 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	log_test "Trap policer burst size"
+}
+
+burst_test()
+{
+	local id
+
+	for id in $(devlink_trap_policer_ids_get); do
+		echo
+		log_info "Running burst size test for policer $id"
+		__burst_test $id
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index fd19161dd4ec..e11a416323cf 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -314,11 +314,11 @@ overlay_smac_is_mc_test()
 		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp1
+	devlink_trap_drop_test $trap_name $group_name $swp1 101
 
 	log_test "Overlay source MAC is multicast"
 
-	devlink_trap_drop_cleanup $mz_pid $swp1 "ip"
+	devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index d72d8488a3b2..7a0a99c1d22f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 ALL_TESTS="
 	netdev_pre_up_test
 	vxlan_vlan_add_test
-	port_vlan_add_test
+	vxlan_bridge_create_test
+	bridge_create_test
 "
 NUM_NETIFS=2
 source $lib_dir/lib.sh
@@ -106,32 +107,56 @@ vxlan_vlan_add_test()
 	ip link del dev br1
 }
 
-port_vlan_add_test()
+vxlan_bridge_create_test()
 {
 	RET=0
 
-	ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
-
 	# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
 	ip link add name vx1 up type vxlan id 1000 \
 		local 192.0.2.17 remote 192.0.2.18 \
 		dstport 4789 tos inherit ttl 100
 
-	ip link set dev $swp1 master br1
-	check_err $?
-
-	bridge vlan del dev $swp1 vid 1
+	# Test with VLAN-aware bridge.
+	ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
 
 	ip link set dev vx1 master br1
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
 	check_err $?
 
-	bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \
+	# Test with VLAN-unaware bridge.
+	ip link set dev br1 type bridge vlan_filtering 0
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
 		| grep -q mlxsw_spectrum
 	check_err $?
 
-	log_test "extack - map VLAN at port"
+	log_test "extack - bridge creation with VXLAN"
 
+	ip link del dev br1
 	ip link del dev vx1
+}
+
+bridge_create_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link add name br2 up type bridge vlan_filtering 1
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	# Only one VLAN-aware bridge is supported, so this should fail with
+	# an extack.
+	ip link set dev $swp2 master br2 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - multiple VLAN-aware bridges creation"
+
+	ip link del dev br2
 	ip link del dev br1
 }
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
new file mode 100644
index 000000000000..cbe50f260a40
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+if [[ ! -v MLXSW_CHIP ]]; then
+	MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]')
+	if [ -z "$MLXSW_CHIP" ]; then
+		echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command"
+		exit 1
+	fi
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index eff6393ce974..71066bc4b886 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -114,23 +114,12 @@ ping_ipv4()
 	ping_test $h1 192.0.2.2
 }
 
-wait_for_packets()
-{
-	local t0=$1; shift
-	local prio_observe=$1; shift
-
-	local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
-	local delta=$((t1 - t0))
-	echo $delta
-	((delta >= 10))
-}
-
 __test_defprio()
 {
 	local prio_install=$1; shift
 	local prio_observe=$1; shift
-	local delta
 	local key
+	local t1
 	local i
 
 	RET=0
@@ -139,9 +128,10 @@ __test_defprio()
 
 	local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
 	mausezahn -q $h1 -d 100m -c 10 -t arp reply
-	delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe)
+	t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \
+		ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
 
-	check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta."
+	check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
 	log_test "Default priority $prio_install/$prio_observe"
 
 	defprio_uninstall $swp1 $prio_install
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index c745ce3befee..4cb2aa65278a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -31,6 +31,7 @@ ALL_TESTS="
 	ping_ipv4
 	test_update
 	test_no_update
+	test_pedit_norewrite
 	test_dscp_leftover
 "
 
@@ -56,6 +57,11 @@ zero()
     echo 0
 }
 
+three()
+{
+    echo 3
+}
+
 h1_create()
 {
 	simple_if_init $h1 192.0.2.1/28
@@ -103,6 +109,9 @@ switch_create()
 	simple_if_init $swp1 192.0.2.2/28
 	__simple_if_init $swp2 v$swp1 192.0.2.17/28
 
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
 	lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
 	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
 	lldpad_app_wait_set $swp1
@@ -115,6 +124,9 @@ switch_destroy()
 	lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
 	lldpad_app_wait_del
 
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
 	__simple_if_fini $swp2 192.0.2.17/28
 	simple_if_fini $swp1 192.0.2.2/28
 }
@@ -223,18 +235,36 @@ __test_update()
 
 test_update()
 {
+	echo "Test net.ipv4.ip_forward_update_priority=1"
 	__test_update 1 reprioritize
 }
 
 test_no_update()
 {
+	echo "Test net.ipv4.ip_forward_update_priority=0"
 	__test_update 0 echo
 }
 
+# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
+test_pedit_norewrite()
+{
+	echo "Test no DSCP rewrite after DSCP is updated by pedit"
+
+	tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
+	    action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
+	    action skbedit priority 3
+
+	__test_update 0 three
+
+	tc filter del dev $swp1 ingress pref 1
+}
+
 # Test that when the last APP rule is removed, the prio->DSCP map is properly
 # set to zeroes, and that the last APP rule does not stay active in the ASIC.
 test_dscp_leftover()
 {
+	echo "Test that last removed DSCP rule is deconfigured correctly"
+
 	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
 	lldpad_app_wait_del
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index d231649b4f01..e93878d42596 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -2,16 +2,15 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ROUTER_NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
 
 router_h1_create()
 {
 	simple_if_init $h1 192.0.1.1/24
-	ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
 }
 
 router_h1_destroy()
 {
-	ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
 	simple_if_fini $h1 192.0.1.1/24
 }
 
@@ -64,13 +63,15 @@ router_setup_prepare()
 	router_create
 }
 
-router_offload_validate()
+wait_for_routes()
 {
-	local route_count=$1
-	local offloaded_count
+	local t0=$1; shift
+	local route_count=$1; shift
 
-	offloaded_count=$(ip route | grep -o 'offload' | wc -l)
-	[[ $offloaded_count -ge $route_count ]]
+	local t1=$(ip route | grep -o 'offload' | wc -l)
+	local delta=$((t1 - t0))
+	echo $delta
+	[[ $delta -ge $route_count ]]
 }
 
 router_routes_create()
@@ -90,8 +91,8 @@ router_routes_create()
 					break 3
 				fi
 
-				echo route add 193.${i}.${j}.${k}/32 via \
-				       192.0.2.1 dev $rp2  >> $ROUTE_FILE
+				echo route add 193.${i}.${j}.${k}/32 dev $rp2 \
+					>> $ROUTE_FILE
 				((count++))
 			done
 		done
@@ -111,45 +112,19 @@ router_test()
 {
 	local route_count=$1
 	local should_fail=$2
-	local count=0
+	local delta
 
 	RET=0
 
+	local t0=$(ip route | grep -o 'offload' | wc -l)
 	router_routes_create $route_count
+	delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count)
 
-	router_offload_validate $route_count
-	check_err_fail $should_fail $? "Offload of $route_count routes"
+	check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta."
 	if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
 		return
 	fi
 
-	tc filter add dev $h2 ingress protocol ip pref 1 flower \
-		skip_sw dst_ip 193.0.0.0/8 action drop
-
-	for i in {0..255}
-	do
-		for j in {0..255}
-		do
-			for k in {0..255}
-			do
-				if [[ $count -eq $route_count ]]; then
-					break 3
-				fi
-
-				$MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
-					-A 192.0.1.1 -B 193.${i}.${j}.${k} \
-					-t ip -q
-				((count++))
-			done
-		done
-	done
-
-	tc_check_packets "dev $h2 ingress" 1 $route_count
-	check_err $? "Offload mismatch"
-
-	tc filter del dev $h2 ingress protocol ip pref 1 flower \
-		skip_sw dst_ip 193.0.0.0/8 action drop
-
 	router_routes_destroy
 }
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5c39e5f6a480..f4031002d5e9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
 	devlink_reload_test
 "
 NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
 
@@ -360,20 +361,24 @@ vlan_rif_refcount_test()
 	ip link add link br0 name br0.10 up type vlan id 10
 	ip -6 address add 2001:db8:1::1/64 dev br0.10
 
-	ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
 	check_err $? "vlan rif was not created before adding port to vlan"
 
 	bridge vlan add vid 10 dev $swp1
-	ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
 	check_err $? "vlan rif was destroyed after adding port to vlan"
 
 	bridge vlan del vid 10 dev $swp1
-	ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
 	check_err $? "vlan rif was destroyed after removing port from vlan"
 
 	ip link set dev $swp1 nomaster
-	ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
-	check_fail $? "vlan rif was not destroyed after unlinking port from bridge"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was not destroyed after unlinking port from bridge"
 
 	log_test "vlan rif refcount"
 
@@ -401,22 +406,28 @@ subport_rif_refcount_test()
 	ip -6 address add 2001:db8:1::1/64 dev bond1
 	ip -6 address add 2001:db8:2::1/64 dev bond1.10
 
-	ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
 	check_err $? "subport rif was not created on lag device"
-	ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
 	check_err $? "subport rif was not created on vlan device"
 
 	ip link set dev $swp1 nomaster
-	ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
 	check_err $? "subport rif of lag device was destroyed when should not"
-	ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
 	check_err $? "subport rif of vlan device was destroyed when should not"
 
 	ip link set dev $swp2 nomaster
-	ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
-	check_fail $? "subport rif of lag device was not destroyed when should"
-	ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
-	check_fail $? "subport rif of vlan device was not destroyed when should"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif of vlan device was not destroyed when should"
 
 	log_test "subport rif refcount"
 
@@ -575,7 +586,8 @@ bridge_extern_learn_test()
 
 	bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn
 
-	bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		bridge fdb show brport $swp1 de:ad:be:ef:13:37
 	check_err $? "fdb entry not marked as offloaded when should"
 
 	log_test "externally learned fdb entry"
@@ -595,9 +607,11 @@ neigh_offload_test()
 	ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
 		dev $swp1
 
-	ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 neigh show dev $swp1 192.0.2.2
 	check_err $? "ipv4 neigh entry not marked as offloaded when should"
-	ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 neigh show dev $swp1 2001:db8:1::2
 	check_err $? "ipv6 neigh entry not marked as offloaded when should"
 
 	log_test "neighbour offload indication"
@@ -623,25 +637,31 @@ nexthop_offload_test()
 	ip -6 route add 2001:db8:2::/64 vrf v$swp1 \
 		nexthop via 2001:db8:1::2 dev $swp1
 
-	ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
 	check_err $? "ipv4 nexthop not marked as offloaded when should"
-	ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
 	check_err $? "ipv6 nexthop not marked as offloaded when should"
 
 	ip link set dev $swp2 down
 	sleep 1
 
-	ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
-	check_fail $? "ipv4 nexthop marked as offloaded when should not"
-	ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
-	check_fail $? "ipv6 nexthop marked as offloaded when should not"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop marked as offloaded when should not"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop marked as offloaded when should not"
 
 	ip link set dev $swp2 up
 	setup_wait
 
-	ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
 	check_err $? "ipv4 nexthop not marked as offloaded after neigh add"
-	ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
 	check_err $? "ipv6 nexthop not marked as offloaded after neigh add"
 
 	log_test "nexthop offload indication"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index c9fc4d4885c1..94c37124a840 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -56,11 +56,19 @@ switch_destroy()
 }
 
 # Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
 {
-	local band=$1; shift
+	local -a streams=("$@")
+	local stream
 
-	ethtool_stats_get "$h2" rx_octets_prio_$band
+	# Wait for qdisc counter update so that we don't get it mid-way through.
+	busywait_for_counter 1000 +1 \
+		qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \
+		> /dev/null
+
+	for stream in ${streams[@]}; do
+		qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+	done
 }
 
 bail_on_lldpad
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644
index 000000000000..0d347d48c112
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -0,0 +1,533 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+                     +--------------------------+
+# | H1                       |                     | H2                       |
+# |     + $h1.10             |                     |     + $h2.10             |
+# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
+# |     |                    |                     |     |                    |
+# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
+# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
+# |     |                |   |                     |     |                |   |
+# |     \______    ______/   |                     |     \______    ______/   |
+# |            \ /           |                     |            \ /           |
+# |             + $h1        |                     |             + $h2        |
+# +-------------|------------+                     +-------------|------------+
+#               | >1Gbps                                         |
+# +-------------|------------------------------------------------|------------+
+# | SW          + $swp1                                          + $swp2      |
+# |     _______/ \___________                        ___________/ \_______    |
+# |    /                     \                      /                     \   |
+# |  +-|-----------------+   |                    +-|-----------------+   |   |
+# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
+# |  |                   |   |        .-------------+ $swp5.10        |   |   |
+# |  |     BR1_10        |   |        |           |                   |   |   |
+# |  |                   |   |        |           |     BR2_10        |   |   |
+# |  | + $swp2.10        |   |        |           |                   |   |   |
+# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
+# |    |                     |        |           +-|-----------------+   |   |
+# |    |   +-----------------|-+      |             |   +-----------------|-+ |
+# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
+# |    |   |                   |      | .-----------------+ $swp5.11        | |
+# |    |   |      BR1_11       |      | |           |   |                   | |
+# |    |   |                   |      | |           |   |      BR2_11       | |
+# |    |   |        $swp2.11 + |      | |           |   |                   | |
+# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
+# |    |                     |        | |           |   +-----------------|-+ |
+# |    \_______   ___________/        | |           \___________   _______/   |
+# |            \ /                    \ /                       \ /           |
+# |             + $swp4                + $swp5                   + $swp3      |
+# +-------------|----------------------|-------------------------|------------+
+#               |                      |                         | 1Gbps
+#               \________1Gbps_________/                         |
+#                                   +----------------------------|------------+
+#                                   | H3                         + $h3        |
+#                                   |      _____________________/ \_______    |
+#                                   |     /                               \   |
+#                                   |     |                               |   |
+#                                   |     + $h3.10                 $h3.11 +   |
+#                                   |       192.0.2.3/28    192.0.2.19/28     |
+#                                   +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ipaddr()
+{
+	local host=$1; shift
+	local vlan=$1; shift
+
+	echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+	local dev=$1; shift
+	local host=$1; shift
+
+	simple_if_init $dev
+	mtu_set $dev 10000
+
+	vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+	ip link set dev $dev.10 type vlan egress 0:0
+
+	vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+	ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+	local dev=$1; shift
+
+	vlan_destroy $dev 11
+	vlan_destroy $dev 10
+	mtu_restore $dev
+	simple_if_fini $dev
+}
+
+h1_create()
+{
+	host_create $h1 1
+}
+
+h1_destroy()
+{
+	host_destroy $h1
+}
+
+h2_create()
+{
+	host_create $h2 2
+
+	# Some of the tests in this suite use multicast traffic. As this traffic
+	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+	# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+	# intended destination) and $swp5 (which is intended as ingress for
+	# another stream of traffic).
+	#
+	# This is generally not a problem, but if the $swp5 throughput is lower
+	# than $swp2 throughput, there will be a build-up at $swp5. That may
+	# cause packets to fail to queue up at $swp3 due to shared buffer
+	# quotas, and the test to spuriously fail.
+	#
+	# Prevent this by setting the speed of $h2 to 1Gbps.
+
+	ethtool -s $h2 speed 1000 autoneg off
+}
+
+h2_destroy()
+{
+	ethtool -s $h2 autoneg on
+	host_destroy $h2
+}
+
+h3_create()
+{
+	host_create $h3 3
+	ethtool -s $h3 speed 1000 autoneg off
+}
+
+h3_destroy()
+{
+	ethtool -s $h3 autoneg on
+	host_destroy $h3
+}
+
+switch_create()
+{
+	local intf
+	local vlan
+
+	ip link add dev br1_10 type bridge
+	ip link add dev br1_11 type bridge
+
+	ip link add dev br2_10 type bridge
+	ip link add dev br2_11 type bridge
+
+	for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+		ip link set dev $intf up
+		mtu_set $intf 10000
+	done
+
+	for intf in $swp1 $swp4; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			ip link set dev $intf.$vlan master br1_$vlan
+			ip link set dev $intf.$vlan up
+		done
+	done
+
+	for intf in $swp2 $swp3 $swp5; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			ip link set dev $intf.$vlan master br2_$vlan
+			ip link set dev $intf.$vlan up
+		done
+	done
+
+	ip link set dev $swp4.10 type vlan egress 0:0
+	ip link set dev $swp4.11 type vlan egress 0:1
+	for intf in $swp1 $swp2 $swp5; do
+		for vlan in 10 11; do
+			ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+		done
+	done
+
+	for intf in $swp2 $swp3 $swp4 $swp5; do
+		ethtool -s $intf speed 1000 autoneg off
+	done
+
+	ip link set dev br1_10 up
+	ip link set dev br1_11 up
+	ip link set dev br2_10 up
+	ip link set dev br2_11 up
+
+	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+	devlink_port_pool_th_set $swp3 8 $size
+}
+
+switch_destroy()
+{
+	local intf
+	local vlan
+
+	devlink_port_pool_th_restore $swp3 8
+
+	tc qdisc del dev $swp3 root 2>/dev/null
+
+	ip link set dev br2_11 down
+	ip link set dev br2_10 down
+	ip link set dev br1_11 down
+	ip link set dev br1_10 down
+
+	for intf in $swp5 $swp4 $swp3 $swp2; do
+		ethtool -s $intf autoneg on
+	done
+
+	for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
+		for vlan in 11 10; do
+			ip link set dev $intf.$vlan down
+			ip link set dev $intf.$vlan nomaster
+			vlan_destroy $intf $vlan
+		done
+
+		mtu_restore $intf
+		ip link set dev $intf down
+	done
+
+	ip link del dev br2_11
+	ip link del dev br2_10
+	ip link del dev br1_11
+	ip link del dev br1_10
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	swp4=${NETIFS[p7]}
+	swp5=${NETIFS[p8]}
+
+	h3_mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+	ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+	ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+	ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+	local vlan=$1; shift
+
+	echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+	local vlan=$1; shift
+
+	local tc=$(get_tc $vlan)
+	local band=$((8 - tc))
+
+	# Handle is 107: for TC1, 108: for TC0.
+	echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+	local vlan=$1; shift
+
+	qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+	local vlan=$1; shift
+
+	local tc=$(($(get_tc $vlan) + 8))
+	ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+	local vlan=$1; shift
+
+	ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_npackets()
+{
+	local vlan=$1; shift
+
+	busywait_for_counter 1100 +1 \
+		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+	local vlan=$1; shift
+	local size=$1; shift
+	local proto=$1; shift
+
+	local tc=$((vlan - 10))
+	local band=$((8 - tc))
+	local cur=-1
+	local i=0
+
+	while :; do
+		local cur=$(busywait 1100 until_counter_is "> $cur" \
+					    get_qdisc_backlog $vlan)
+		local diff=$((size - cur))
+		local pkts=$(((diff + 7999) / 8000))
+
+		if ((cur >= size)); then
+			echo $cur
+			return 0
+		elif ((i++ > 10)); then
+			echo $cur
+			return 1
+		fi
+
+		$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+		    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+		    -t $proto -q -c $pkts "$@"
+	done
+}
+
+check_marking()
+{
+	local vlan=$1; shift
+	local cond=$1; shift
+
+	local npackets_0=$(get_qdisc_npackets $vlan)
+	local nmarked_0=$(get_nmarked $vlan)
+	sleep 5
+	local npackets_1=$(get_qdisc_npackets $vlan)
+	local nmarked_1=$(get_nmarked $vlan)
+
+	local nmarked_d=$((nmarked_1 - nmarked_0))
+	local npackets_d=$((npackets_1 - npackets_0))
+	local pct=$((100 * nmarked_d / npackets_d))
+
+	echo $pct
+	((pct $cond))
+}
+
+ecn_test_common()
+{
+	local name=$1; shift
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Build the below-the-limit backlog using UDP. We could use TCP just
+	# fine, but this way we get a proof that UDP is accepted when queue
+	# length is below the limit. The main stream is using TCP, and if the
+	# limit is misconfigured, we would see this traffic being ECN marked.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): $name backlog < limit"
+
+	# Now push TCP, because non-TCP traffic would be early-dropped after the
+	# backlog crosses the limit, and we want to make sure that the backlog
+	# is above the limit.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan ">= 95")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+	log_test "TC $((vlan - 10)): $name backlog > limit"
+}
+
+do_ecn_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name=ECN
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	sleep 1
+
+	ecn_test_common "$name" $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, it should all get dropped, and backlog
+	# building should fail.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name="ECN nodrop"
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	sleep 1
+
+	ecn_test_common "$name" $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, in nodrop mode, make sure it goes to
+	# backlog as well.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_red_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Use ECN-capable TCP to verify there's no marking even though the queue
+	# is above limit.
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+
+	# Pushing below the queue limit should work.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): RED backlog < limit"
+
+	# Pushing above should not.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_fail $? "Traffic went into backlog instead of being early-dropped"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	local diff=$((limit - backlog))
+	pct=$((100 * diff / limit))
+	((0 <= pct && pct <= 5))
+	check_err $? "backlog $backlog / $limit expected <= 5% distance"
+	log_test "TC $((vlan - 10)): RED backlog > limit"
+
+	stop_traffic
+	sleep 1
+}
+
+do_mc_backlog_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	RET=0
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+	start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+
+	qbl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_qdisc_backlog $vlan)
+	check_err $? "Could not build MC backlog"
+
+	# Verify that we actually see the backlog on BUM TC. Do a busywait as
+	# well, performance blips might cause false fail.
+	local ebl
+	ebl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_mc_transmit_queue $vlan)
+	check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+	stop_traffic
+	stop_traffic
+
+	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755
index 000000000000..1c36c576613b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=200000
+BACKLOG2=500000
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 root handle 10: $QDISC \
+	   bands 8 priomap 7 6 5 4 3 2 1 0
+	tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+	   limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+	tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+	   limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+	   probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 parent 10:7
+	tc qdisc del dev $swp3 parent 10:8
+	tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+
+	do_ecn_test 10 $BACKLOG1
+	do_ecn_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+
+	do_ecn_nodrop_test 10 $BACKLOG1
+	do_ecn_nodrop_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+red_test()
+{
+	install_qdisc
+
+	do_red_test 10 $BACKLOG1
+	do_red_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+
+	# Note that the backlog numbers here do not correspond to RED
+	# configuration, but are arbitrary.
+	do_mc_backlog_test 10 $BACKLOG1
+	do_mc_backlog_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755
index 000000000000..76820a0e9a1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755
index 000000000000..558667ea11ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 root handle 108: red \
+	   limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+	do_ecn_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+	do_ecn_nodrop_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+red_test()
+{
+	install_qdisc
+	do_red_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+	# Note that the backlog value here does not correspond to RED
+	# configuration, but is arbitrary.
+	do_mc_backlog_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
new file mode 100755
index 000000000000..58f3a05f08af
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	port_pool_test
+	port_tc_ip_test
+	port_tc_arp_test
+"
+
+NUM_NETIFS=2
+source ../../../net/forwarding/lib.sh
+source ../../../net/forwarding/devlink_lib.sh
+source mlxsw_lib.sh
+
+SB_POOL_ING=0
+SB_POOL_EGR_CPU=10
+
+SB_ITC_CPU_IP=3
+SB_ITC_CPU_ARP=2
+SB_ITC=0
+
+h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.1.2/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.1.2/24
+}
+
+sb_occ_pool_check()
+{
+	local dl_port=$1; shift
+	local pool=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_itc_check()
+{
+	local dl_port=$1; shift
+	local itc=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_etc_check()
+{
+	local dl_port=$1; shift
+	local etc=$1; shift
+	local exp_max_occ=$1; shift
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+port_pool_test()
+{
+	local exp_max_occ=288
+	local max_occ
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
+	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress pool"
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
+	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress pool"
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
+	check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress pool"
+}
+
+port_tc_ip_test()
+{
+	local exp_max_occ=288
+	local max_occ
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress TC - IP packet"
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - IP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - IP packet"
+}
+
+port_tc_arp_test()
+{
+	local exp_max_occ=96
+	local max_occ
+
+	if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
+		exp_max_occ=144
+	fi
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress TC - ARP packet"
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - ARP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - ARP packet"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	dl_port1=$(devlink_port_by_netdev $h1)
+	dl_port2=$(devlink_port_by_netdev $h2)
+
+	cpu_dl_port=$(devlink_cpu_port_get)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
new file mode 100755
index 000000000000..0d4b9327c9b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -0,0 +1,416 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import json as j
+import random
+
+
+class SkipTest(Exception):
+    pass
+
+
+class RandomValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 3 different
+    objects, pool, tcbind and portpool. Provide an interface to get random
+    values for a specific object type as the follow:
+      1. Pool:
+         - random size
+
+      2. TcBind:
+         - random pool number
+         - random threshold
+
+      3. PortPool:
+         - random threshold
+    """
+    def __init__(self, pools):
+        self._pools = []
+        for pool in pools:
+            self._pools.append(pool)
+
+    def _cell_size(self):
+        return self._pools[0]["cell_size"]
+
+    def _get_static_size(self, th):
+        # For threshold of 16, this works out to be about 12MB on Spectrum-1,
+        # and about 17MB on Spectrum-2.
+        return th * 8000 * self._cell_size()
+
+    def _get_size(self):
+        return self._get_static_size(16)
+
+    def _get_thtype(self):
+        return "static"
+
+    def _get_th(self, pool):
+        # Threshold value could be any integer between 3 to 16
+        th = random.randint(3, 16)
+        if pool["thtype"] == "dynamic":
+            return th
+        else:
+            return self._get_static_size(th)
+
+    def _get_pool(self, direction):
+        ing_pools = []
+        egr_pools = []
+        for pool in self._pools:
+            if pool["type"] == "ingress":
+                ing_pools.append(pool)
+            else:
+                egr_pools.append(pool)
+        if direction == "ingress":
+            arr = ing_pools
+        else:
+            arr = egr_pools
+        return arr[random.randint(0, len(arr) - 1)]
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool):
+            if objid["pool"] in [4, 8, 9, 10]:
+                # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+                raise SkipTest()
+            else:
+                return (self._get_size(), self._get_thtype())
+        if isinstance(objid, TcBind):
+            if objid["tc"] >= 8:
+                # Multicast TCs cannot be changed
+                raise SkipTest()
+            else:
+                pool = self._get_pool(objid["type"])
+                th = self._get_th(pool)
+                pool_n = pool["pool"]
+                return (pool_n, th)
+        if isinstance(objid, PortPool):
+            pool_n = objid["pool"]
+            pool = self._pools[pool_n]
+            assert pool["pool"] == pool_n
+            th = self._get_th(pool)
+            return (th,)
+
+
+class RecordValuePickerException(Exception):
+    pass
+
+
+class RecordValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 2 different
+    objects, pool and tcbind. Provide an interface to get the stored values per
+    object type.
+    """
+    def __init__(self, objlist):
+        self._recs = []
+        for item in objlist:
+            self._recs.append({"objid": item, "value": item.var_tuple()})
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]:
+            # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+            raise SkipTest()
+        if isinstance(objid, TcBind) and objid["tc"] >= 8:
+            # Multicast TCs cannot be changed
+            raise SkipTest()
+        for rec in self._recs:
+            if rec["objid"].weak_eq(objid):
+                return rec["value"]
+        raise RecordValuePickerException()
+
+
+def run_cmd(cmd, json=False):
+    out = subprocess.check_output(cmd, shell=True)
+    if json:
+        return j.loads(out)
+    return out
+
+
+def run_json_cmd(cmd):
+    return run_cmd(cmd, json=True)
+
+
+def log_test(test_name, err_msg=None):
+    if err_msg:
+        print("\t%s" % err_msg)
+        print("TEST: %-80s  [FAIL]" % test_name)
+    else:
+        print("TEST: %-80s  [ OK ]" % test_name)
+
+
+class CommonItem(dict):
+    varitems = []
+
+    def var_tuple(self):
+        ret = []
+        self.varitems.sort()
+        for key in self.varitems:
+            ret.append(self[key])
+        return tuple(ret)
+
+    def weak_eq(self, other):
+        for key in self:
+            if key in self.varitems:
+                continue
+            if self[key] != other[key]:
+                return False
+        return True
+
+
+class CommonList(list):
+    def get_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                return item
+        return None
+
+    def del_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                self.remove(item)
+
+
+class Pool(CommonItem):
+    varitems = ["size", "thtype"]
+
+    def dl_set(self, dlname, size, thtype):
+        run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"],
+                                                                                self["pool"],
+                                                                                size, thtype))
+
+
+class PoolList(CommonList):
+    pass
+
+
+def get_pools(dlname, direction=None):
+    d = run_json_cmd("devlink sb pool show -j")
+    pools = PoolList()
+    for pooldict in d["pool"][dlname]:
+        if not direction or direction == pooldict["type"]:
+            pools.append(Pool(pooldict))
+    return pools
+
+
+def do_check_pools(dlname, pools, vp):
+    for pool in pools:
+        pre_pools = get_pools(dlname)
+        try:
+            (size, thtype) = vp.get_value(pool)
+        except SkipTest:
+            continue
+        pool.dl_set(dlname, size, thtype)
+        post_pools = get_pools(dlname)
+        pool = post_pools.get_by(pool)
+
+        err_msg = None
+        if pool["size"] != size:
+            err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size)
+        if pool["thtype"] != thtype:
+            err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype)
+
+        pre_pools.del_by(pool)
+        post_pools.del_by(pool)
+        if pre_pools != post_pools:
+            err_msg = "Other pool setup changed as well"
+        log_test("pool {} of sb {} set verification".format(pool["pool"],
+                                                            pool["sb"]), err_msg)
+
+
+def check_pools(dlname, pools):
+    # Save defaults
+    record_vp = RecordValuePicker(pools)
+
+    # For each pool, set random size and static threshold type
+    do_check_pools(dlname, pools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_pools(dlname, pools, record_vp)
+
+
+class TcBind(CommonItem):
+    varitems = ["pool", "threshold"]
+
+    def __init__(self, port, d):
+        super(TcBind, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, pool, th):
+        run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"],
+                                                                                     self["sb"],
+                                                                                     self["tc"],
+                                                                                     self["type"],
+                                                                                     pool, th))
+
+
+class TcBindList(CommonList):
+    pass
+
+
+def get_tcbinds(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb tc bind show -j -n")
+    tcbinds = TcBindList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0:
+            err_msg = "No tc bind for port"
+        else:
+            for tcbinddict in d["tc_bind"][port.name]:
+                tcbinds.append(TcBind(port, tcbinddict))
+        if verify_existence:
+            log_test("tc bind existence for port {} verification".format(port.name), err_msg)
+    return tcbinds
+
+
+def do_check_tcbind(ports, tcbinds, vp):
+    for tcbind in tcbinds:
+        pre_tcbinds = get_tcbinds(ports)
+        try:
+            (pool, th) = vp.get_value(tcbind)
+        except SkipTest:
+            continue
+        tcbind.dl_set(pool, th)
+        post_tcbinds = get_tcbinds(ports)
+        tcbind = post_tcbinds.get_by(tcbind)
+
+        err_msg = None
+        if tcbind["pool"] != pool:
+            err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool)
+        if tcbind["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th)
+
+        pre_tcbinds.del_by(tcbind)
+        post_tcbinds.del_by(tcbind)
+        if pre_tcbinds != post_tcbinds:
+            err_msg = "Other tc bind setup changed as well"
+        log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"],
+                                                                  tcbind["tc"],
+                                                                  tcbind["sb"]), err_msg)
+
+
+def check_tcbind(dlname, ports, pools):
+    tcbinds = get_tcbinds(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(tcbinds)
+
+    # Bind each port and unicast TC (TCs < 8) to a random pool and a random
+    # threshold
+    do_check_tcbind(ports, tcbinds, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_tcbind(ports, tcbinds, record_vp)
+
+
+class PortPool(CommonItem):
+    varitems = ["threshold"]
+
+    def __init__(self, port, d):
+        super(PortPool, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, th):
+        run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"],
+                                                                         self["sb"],
+                                                                         self["pool"], th))
+
+
+class PortPoolList(CommonList):
+    pass
+
+
+def get_portpools(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb port pool -j -n")
+    portpools = PortPoolList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0:
+            err_msg = "No port pool for port"
+        else:
+            for portpooldict in d["port_pool"][port.name]:
+                portpools.append(PortPool(port, portpooldict))
+        if verify_existence:
+            log_test("port pool existence for port {} verification".format(port.name), err_msg)
+    return portpools
+
+
+def do_check_portpool(ports, portpools, vp):
+    for portpool in portpools:
+        pre_portpools = get_portpools(ports)
+        (th,) = vp.get_value(portpool)
+        portpool.dl_set(th)
+        post_portpools = get_portpools(ports)
+        portpool = post_portpools.get_by(portpool)
+
+        err_msg = None
+        if portpool["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th)
+
+        pre_portpools.del_by(portpool)
+        post_portpools.del_by(portpool)
+        if pre_portpools != post_portpools:
+            err_msg = "Other port pool setup changed as well"
+        log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"],
+                                                                    portpool["pool"],
+                                                                    portpool["sb"]), err_msg)
+
+
+def check_portpool(dlname, ports, pools):
+    portpools = get_portpools(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(portpools)
+
+    # For each port pool, set a random threshold
+    do_check_portpool(ports, portpools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_portpool(ports, portpools, record_vp)
+
+
+class Port:
+    def __init__(self, name):
+        self.name = name
+
+
+class PortList(list):
+    pass
+
+
+def get_ports(dlname):
+    d = run_json_cmd("devlink port show -j")
+    ports = PortList()
+    for name in d["port"]:
+        if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical":
+            ports.append(Port(name))
+    return ports
+
+
+def get_device():
+    devices_info = run_json_cmd("devlink -j dev info")["info"]
+    for d in devices_info:
+        if "mlxsw_spectrum" in devices_info[d]["driver"]:
+            return d
+    return None
+
+
+class UnavailableDevlinkNameException(Exception):
+    pass
+
+
+def test_sb_configuration():
+    # Use static seed
+    random.seed(0)
+
+    dlname = get_device()
+    if not dlname:
+        raise UnavailableDevlinkNameException()
+
+    ports = get_ports(dlname)
+    pools = get_pools(dlname)
+
+    check_pools(dlname, pools)
+    check_tcbind(dlname, ports, pools)
+    check_portpool(dlname, ports, pools)
+
+
+test_sb_configuration()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 7b2acba82a49..fd583a171db7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -8,8 +8,9 @@ source $lib_dir/lib.sh
 source $lib_dir/tc_common.sh
 source $lib_dir/devlink_lib.sh
 
-if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then
-	echo "SKIP: test is tailored for Mellanox Spectrum-2"
+if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
+	"$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
+	echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
 	exit 1
 fi
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index a0795227216e..efd798a85931 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -8,9 +8,9 @@ tc_flower_get_target()
 	# The driver associates a counter with each tc filter, which means the
 	# number of supported filters is bounded by the number of available
 	# counters.
-	# Currently, the driver supports 12K (12,288) flow counters and six of
+	# Currently, the driver supports 30K (30,720) flow counters and six of
 	# these are used for multicast routing.
-	local target=12282
+	local target=30714
 
 	if ((! should_fail)); then
 		echo $target
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
new file mode 100755
index 000000000000..20ed98fe5a60
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	default_hw_stats_test
+	immediate_hw_stats_test
+	delayed_hw_stats_test
+	disabled_hw_stats_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+hw_stats_test()
+{
+	RET=0
+
+	local name=$1
+	local action_hw_stats=$2
+	local occ_delta=$3
+	local expected_packet_count=$4
+
+	local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats
+	check_err $? "Failed to add rule with $name hw_stats"
+
+	local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+	local expected_occ=$((orig_occ + occ_delta))
+	[ "$new_occ" == "$expected_occ" ]
+	check_err $? "Expected occupancy of $expected_occ, got $new_occ"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count
+	check_err $? "Did not match incoming packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "$name hw_stats"
+}
+
+default_hw_stats_test()
+{
+	hw_stats_test "default" "" 2 1
+}
+
+immediate_hw_stats_test()
+{
+	hw_stats_test "immediate" "hw_stats immediate" 2 1
+}
+
+delayed_hw_stats_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed
+	check_fail $? "Unexpected success in adding rule with delayed hw_stats"
+
+	log_test "delayed hw_stats"
+}
+
+disabled_hw_stats_test()
+{
+	hw_stats_test "disabled" "hw_stats disabled" 0 0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	swp1mac=$(mac_get $swp1)
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+check_tc_action_hw_stats_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
new file mode 100755
index 000000000000..68c80d0ec1ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	shared_block_drop_test
+	egress_redirect_test
+	multi_mirror_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.1/24
+	simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.2/24
+	simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mixed-bound
+	# shared block with a drop rule.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to egress bound shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mirred redirect on
+	# egress-bound block.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_err $? "Failed to add redirect rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple mirror
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2
+	check_err $? "Failed to add rule with single mirror action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2 \
+		action mirred egress mirror dev $swp1
+	check_fail $? "Incorrect success to add rule with two mirror actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi mirror"
+}
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index a6d733d2a4b4..cc0f07e72cf2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -2,9 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
 # Test for resource limit of offloaded flower rules. The test adds a given
-# number of flower matches for different IPv6 addresses, then generates traffic,
-# and ensures each was hit exactly once. This file contains functions to set up
-# a testing topology and run the test, and is meant to be sourced from a test
+# number of flower matches for different IPv6 addresses, then check the offload
+# indication for all of the tc flower rules. This file contains functions to set
+# up a testing topology and run the test, and is meant to be sourced from a test
 # script that calls the testing routine with a given number of rules.
 
 TC_FLOWER_NUM_NETIFS=2
@@ -94,22 +94,15 @@ __tc_flower_test()
 
 	tc_flower_rules_create $count $should_fail
 
-	for ((i = 0; i < count; ++i)); do
-		$MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
-			-A 2001:db8:2::1 \
-			-B $(tc_flower_addr $i)
-	done
-
-	MISMATCHES=$(
-		tc -j -s filter show dev $h2 ingress |
-		jq -r '[ .[] | select(.kind == "flower") | .options |
-		         values as $rule | .actions[].stats.packets |
-		         select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
-		       join(", ")'
-	)
-
-	test -z "$MISMATCHES"
-	check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+	offload_count=$(tc -j -s filter show dev $h2 ingress    |
+			jq -r '[ .[] | select(.kind == "flower") |
+			.options | .in_hw ]' | jq .[] | wc -l)
+	[[ $((offload_count - 1)) -eq $count ]]
+	if [[ $should_fail -eq 0 ]]; then
+		check_err $? "Offload mismatch"
+	else
+		check_err_fail $should_fail $? "Offload more than expacted"
+	fi
 }
 
 tc_flower_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 4632f51af7ab..729a86cc4ede 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -9,6 +9,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 ALL_TESTS="sanitization_test offload_indication_test \
 	sanitization_vlan_aware_test offload_indication_vlan_aware_test"
 NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
 source $lib_dir/lib.sh
 
 setup_prepare()
@@ -470,8 +471,8 @@ offload_indication_fdb_flood_test()
 
 	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
 
-	bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
+		bridge fdb show brport vxlan0
 	check_err $?
 
 	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self
@@ -486,11 +487,11 @@ offload_indication_fdb_bridge_test()
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
 		dst 198.51.100.2
 
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
 	check_err $?
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
 	check_err $?
 
 	log_test "vxlan entry offload indication - initial state"
@@ -500,9 +501,9 @@ offload_indication_fdb_bridge_test()
 	RET=0
 
 	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
 
 	log_test "vxlan entry offload indication - after removal from bridge"
 
@@ -511,11 +512,11 @@ offload_indication_fdb_bridge_test()
 	RET=0
 
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
 	check_err $?
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
 	check_err $?
 
 	log_test "vxlan entry offload indication - after re-add to bridge"
@@ -525,9 +526,9 @@ offload_indication_fdb_bridge_test()
 	RET=0
 
 	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
 
 	log_test "vxlan entry offload indication - after removal from vxlan"
 
@@ -536,11 +537,11 @@ offload_indication_fdb_bridge_test()
 	RET=0
 
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
 	check_err $?
-	bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
 	check_err $?
 
 	log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -558,27 +559,32 @@ offload_indication_decap_route_test()
 {
 	RET=0
 
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link set dev vxlan0 down
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link set dev vxlan1 down
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	log_test "vxlan decap route - vxlan device down"
 
 	RET=0
 
 	ip link set dev vxlan1 up
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link set dev vxlan0 up
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	log_test "vxlan decap route - vxlan device up"
@@ -586,11 +592,13 @@ offload_indication_decap_route_test()
 	RET=0
 
 	ip address delete 198.51.100.1/32 dev lo
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	ip address add 198.51.100.1/32 dev lo
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	log_test "vxlan decap route - add local route"
@@ -598,16 +606,19 @@ offload_indication_decap_route_test()
 	RET=0
 
 	ip link set dev $swp1 nomaster
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link set dev $swp2 nomaster
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	ip link set dev $swp1 master br0
 	ip link set dev $swp2 master br1
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	log_test "vxlan decap route - local ports enslavement"
@@ -615,12 +626,14 @@ offload_indication_decap_route_test()
 	RET=0
 
 	ip link del dev br0
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link del dev br1
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	log_test "vxlan decap route - bridge device deletion"
 
@@ -632,16 +645,19 @@ offload_indication_decap_route_test()
 	ip link set dev $swp2 master br1
 	ip link set dev vxlan0 master br0
 	ip link set dev vxlan1 master br1
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link del dev vxlan0
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	ip link del dev vxlan1
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	log_test "vxlan decap route - vxlan device deletion"
 
@@ -656,12 +672,15 @@ check_fdb_offloaded()
 	local mac=00:11:22:33:44:55
 	local zmac=00:00:00:00:00:00
 
-	bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
 	check_err $?
-	bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
 	check_err $?
 
-	bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
 	check_err $?
 }
 
@@ -672,13 +691,15 @@ check_vxlan_fdb_not_offloaded()
 
 	bridge fdb show dev vxlan0 | grep $mac | grep -q self
 	check_err $?
-	bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
+	check_err $?
 
 	bridge fdb show dev vxlan0 | grep $zmac | grep -q self
 	check_err $?
-	bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
 }
 
 check_bridge_fdb_not_offloaded()
@@ -688,8 +709,9 @@ check_bridge_fdb_not_offloaded()
 
 	bridge fdb show dev vxlan0 | grep $mac | grep -q master
 	check_err $?
-	bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
+	check_err $?
 }
 
 __offload_indication_join_vxlan_first()
@@ -771,12 +793,14 @@ __offload_indication_join_vxlan_last()
 
 	ip link set dev $swp1 master br0
 
-	bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
 
 	ip link set dev vxlan0 master br0
 
-	bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
 	check_err $?
 
 	log_test "offload indication - attach vxlan last"
@@ -854,20 +878,26 @@ sanitization_vlan_aware_test()
 	bridge vlan del vid 10 dev vxlan20
 	bridge vlan add vid 20 dev vxlan20 pvid untagged
 
-	# Test that offloading of an unsupported tunnel fails when it is
-	# triggered by addition of VLAN to a local port
-	RET=0
+	# Test that when two VXLAN tunnels with conflicting configurations
+	# (i.e., different TTL) are enslaved to the same VLAN-aware bridge,
+	# then the enslavement of a port to the bridge is denied.
 
-	# TOS must be set to inherit
-	ip link set dev vxlan10 type vxlan tos 42
+	# Use the offload indication of the local route to ensure the VXLAN
+	# configuration was correctly rollbacked.
+	ip address add 198.51.100.1/32 dev lo
 
-	ip link set dev $swp1 master br0
-	bridge vlan add vid 10 dev $swp1 &> /dev/null
+	ip link set dev vxlan10 type vxlan ttl 10
+	ip link set dev $swp1 master br0 &> /dev/null
 	check_fail $?
 
-	log_test "vlan-aware - failed vlan addition to a local port"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vlan-aware - failed enslavement to bridge due to conflict"
 
-	ip link set dev vxlan10 type vxlan tos inherit
+	ip link set dev vxlan10 type vxlan ttl 20
+	ip address del 198.51.100.1/32 dev lo
 
 	ip link del dev vxlan20
 	ip link del dev vxlan10
@@ -924,11 +954,11 @@ offload_indication_vlan_aware_fdb_test()
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
 		dst 198.51.100.2 vlan 10
 
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
 	check_err $?
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
 	check_err $?
 
 	log_test "vxlan entry offload indication - initial state"
@@ -938,9 +968,9 @@ offload_indication_vlan_aware_fdb_test()
 	RET=0
 
 	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
 
 	log_test "vxlan entry offload indication - after removal from bridge"
 
@@ -949,11 +979,11 @@ offload_indication_vlan_aware_fdb_test()
 	RET=0
 
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
 	check_err $?
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
 	check_err $?
 
 	log_test "vxlan entry offload indication - after re-add to bridge"
@@ -963,9 +993,9 @@ offload_indication_vlan_aware_fdb_test()
 	RET=0
 
 	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
 
 	log_test "vxlan entry offload indication - after removal from vxlan"
 
@@ -974,11 +1004,11 @@ offload_indication_vlan_aware_fdb_test()
 	RET=0
 
 	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
 	check_err $?
-	bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
 	check_err $?
 
 	log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -990,28 +1020,31 @@ offload_indication_vlan_aware_decap_route_test()
 {
 	RET=0
 
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	# Toggle PVID flag on one VxLAN device and make sure route is still
 	# marked as offloaded
 	bridge vlan add vid 10 dev vxlan10 untagged
 
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
 	check_err $?
 
 	# Toggle PVID flag on second VxLAN device and make sure route is no
 	# longer marked as offloaded
 	bridge vlan add vid 20 dev vxlan20 untagged
 
-	ip route show table local | grep 198.51.100.1 | grep -q offload
-	check_fail $?
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
 
 	# Toggle PVID flag back and make sure route is marked as offloaded
 	bridge vlan add vid 10 dev vxlan10 pvid untagged
 	bridge vlan add vid 20 dev vxlan20 pvid untagged
 
-	ip route show table local | grep 198.51.100.1 | grep -q offload
+	busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
 	check_err $?
 
 	log_test "vxlan decap route - vni map/unmap"
@@ -1064,35 +1097,33 @@ offload_indication_vlan_aware_l3vni_test()
 	ip link set dev vxlan0 master br0
 	bridge vlan add dev vxlan0 vid 10 pvid untagged
 
-	# No local port or router port is member in the VLAN, so tunnel should
-	# not be offloaded
-	bridge fdb show brport vxlan0 | grep $zmac | grep self \
-		| grep -q offload
-	check_fail $? "vxlan tunnel offloaded when should not"
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded when should"
 
 	# Configure a VLAN interface and make sure tunnel is offloaded
 	ip link add link br0 name br10 up type vlan id 10
 	sysctl_set net.ipv6.conf.br10.disable_ipv6 0
 	ip -6 address add 2001:db8:1::1/64 dev br10
-	bridge fdb show brport vxlan0 | grep $zmac | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
 	check_err $? "vxlan tunnel not offloaded when should"
 
 	# Unlink the VXLAN device, make sure tunnel is no longer offloaded,
 	# then add it back to the bridge and make sure it is offloaded
 	ip link set dev vxlan0 nomaster
-	bridge fdb show brport vxlan0 | grep $zmac | grep self \
-		| grep -q offload
-	check_fail $? "vxlan tunnel offloaded after unlinked from bridge"
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded after unlinked from bridge"
 
 	ip link set dev vxlan0 master br0
-	bridge fdb show brport vxlan0 | grep $zmac | grep self \
-		| grep -q offload
-	check_fail $? "vxlan tunnel offloaded despite no matching vid"
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded despite no matching vid"
 
 	bridge vlan add dev vxlan0 vid 10 pvid untagged
-	bridge fdb show brport vxlan0 | grep $zmac | grep self \
-		| grep -q offload
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
 	check_err $? "vxlan tunnel not offloaded after adding vid"
 
 	log_test "vxlan - l3 vni"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 025a84c2ab5a..9f9741444549 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -141,6 +141,16 @@ regions_test()
 
 	check_region_snapshot_count dummy post-first-delete 2
 
+	devlink region new $DL_HANDLE/dummy snapshot 25
+	check_err $? "Failed to create a new snapshot with id 25"
+
+	check_region_snapshot_count dummy post-first-request 3
+
+	devlink region del $DL_HANDLE/dummy snapshot 25
+	check_err $? "Failed to delete snapshot with id 25"
+
+	check_region_snapshot_count dummy post-second-delete 2
+
 	log_test "regions test"
 }
 
@@ -367,6 +377,11 @@ dummy_reporter_test()
 {
 	RET=0
 
+	check_reporter_info dummy healthy 0 0 0 true
+
+	devlink health set $DL_HANDLE reporter dummy auto_recover false
+	check_err $? "Failed to dummy reporter auto_recover option"
+
 	check_reporter_info dummy healthy 0 0 0 false
 
 	local BREAK_MSG="foo bar"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index f101ab9441e2..dbd1e014ba17 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -16,6 +16,8 @@ ALL_TESTS="
 	trap_group_action_test
 	bad_trap_group_test
 	trap_group_stats_test
+	trap_policer_test
+	trap_policer_bind_test
 	port_del_test
 	dev_del_test
 "
@@ -23,6 +25,7 @@ NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
 DEVLINK_DEV=netdevsim/${DEV}
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
 SLEEP_TIME=1
 NETDEV=""
 NUM_NETIFS=0
@@ -103,6 +106,11 @@ trap_metadata_test()
 	for trap_name in $(devlink_traps_get); do
 		devlink_trap_metadata_test $trap_name "input_port"
 		check_err $? "Input port not reported as metadata of trap $trap_name"
+		if [ $trap_name == "ingress_flow_action_drop" ] ||
+		   [ $trap_name == "egress_flow_action_drop" ]; then
+			devlink_trap_metadata_test $trap_name "flow_action_cookie"
+			check_err $? "Flow action cookie not reported as metadata of trap $trap_name"
+		fi
 	done
 
 	log_test "Trap metadata"
@@ -251,6 +259,119 @@ trap_group_stats_test()
 	log_test "Trap group statistics"
 }
 
+trap_policer_test()
+{
+	local packets_t0
+	local packets_t1
+
+	if [ $(devlink_trap_policers_num_get) -eq 0 ]; then
+		check_err 1 "Failed to dump policers"
+	fi
+
+	devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for setting a non-existing policer"
+	devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for getting a non-existing policer"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16
+	check_err $? "Failed to set valid parameters for a valid policer"
+	if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+		check_err 1 "Policer rate was not changed"
+	fi
+	if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+		check_err 1 "Policer burst size was not changed"
+	fi
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+	check_fail $? "Policer rate was changed to rate lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null
+	check_fail $? "Policer rate was changed to rate higher than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size higher than limit"
+	echo "y" > $DEBUGFS_DIR/fail_trap_policer_set
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null
+	check_fail $? "Managed to set policer rate when should not"
+	echo "n" > $DEBUGFS_DIR/fail_trap_policer_set
+	if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+		check_err 1 "Policer rate was changed to an invalid value"
+	fi
+	if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+		check_err 1 "Policer burst size was changed to an invalid value"
+	fi
+
+	packets_t0=$(devlink_trap_policer_rx_dropped_get 1)
+	sleep .5
+	packets_t1=$(devlink_trap_policer_rx_dropped_get 1)
+	if [ ! $packets_t1 -gt $packets_t0 ]; then
+		check_err 1 "Policer drop counter was not incremented"
+	fi
+
+	echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+	devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+	check_fail $? "Managed to read policer drop counter when should not"
+	echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+	devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+	check_err $? "Did not manage to read policer drop counter when should"
+
+	log_test "Trap policer"
+}
+
+trap_group_check_policer()
+{
+	local group_name=$1; shift
+
+	devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+		| jq -e '.[][][]["policer"]' &> /dev/null
+}
+
+trap_policer_bind_test()
+{
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+	if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+		check_err 1 "Bound policer was not changed"
+	fi
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \
+		&> /dev/null
+	check_fail $? "Did not get an error for binding a non-existing policer"
+	if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+		check_err 1 "Bound policer was changed when should not"
+	fi
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 0
+	check_err $? "Failed to unbind a policer when using ID 0"
+	trap_group_check_policer "l2_drops"
+	check_fail $? "Trap group has a policer after unbinding with ID 0"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops nopolicer
+	check_err $? "Failed to unbind a policer when using 'nopolicer' keyword"
+	trap_group_check_policer "l2_drops"
+	check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+
+	echo "y"> $DEBUGFS_DIR/fail_trap_group_set
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \
+		&> /dev/null
+	check_fail $? "Managed to bind a policer when should not"
+	echo "n"> $DEBUGFS_DIR/fail_trap_group_set
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 2
+	check_err $? "Did not manage to bind a policer when should"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops action drop \
+		policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for partially modified trap group"
+
+	log_test "Trap policer binding"
+}
+
 port_del_test()
 {
 	local group_name
diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore
index 33618493562b..807407f7f58b 100644
--- a/tools/testing/selftests/efivarfs/.gitignore
+++ b/tools/testing/selftests/efivarfs/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 create-read
 open-unlink
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index b02279da6fa1..c078ece12ff0 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 subdir*
 script*
 execveat
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 8449cf6716ce..f0c0ff20d6cf 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 dnotify_test
 devpts_pts
diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore
index 8a5d9bf63dd4..8e5cf9084894 100644
--- a/tools/testing/selftests/filesystems/binderfs/.gitignore
+++ b/tools/testing/selftests/filesystems/binderfs/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 binderfs_test
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
index 58cb659b56b4..8af25ae96049 100644
--- a/tools/testing/selftests/filesystems/binderfs/Makefile
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += -I../../../../../usr/include/ -pthread
 TEST_GEN_PROGS := binderfs_test
 
+binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h
+
 include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 8c2ed962e1c7..8a6b507e34a8 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -3,114 +3,47 @@
 #define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
+#include <pthread.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/fsuid.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <unistd.h>
 #include <linux/android/binder.h>
 #include <linux/android/binderfs.h>
-#include "../../kselftest.h"
-
-static ssize_t write_nointr(int fd, const void *buf, size_t count)
-{
-	ssize_t ret;
-again:
-	ret = write(fd, buf, count);
-	if (ret < 0 && errno == EINTR)
-		goto again;
-
-	return ret;
-}
 
-static void write_to_file(const char *filename, const void *buf, size_t count,
-			  int allowed_errno)
-{
-	int fd, saved_errno;
-	ssize_t ret;
+#include "../../kselftest.h"
+#include "../../kselftest_harness.h"
 
-	fd = open(filename, O_WRONLY | O_CLOEXEC);
-	if (fd < 0)
-		ksft_exit_fail_msg("%s - Failed to open file %s\n",
-				   strerror(errno), filename);
+#define DEFAULT_THREADS 4
 
-	ret = write_nointr(fd, buf, count);
-	if (ret < 0) {
-		if (allowed_errno && (errno == allowed_errno)) {
-			close(fd);
-			return;
-		}
+#define PTR_TO_INT(p) ((int)((intptr_t)(p)))
+#define INT_TO_PTR(u) ((void *)((intptr_t)(u)))
 
-		goto on_error;
+#define close_prot_errno_disarm(fd) \
+	if (fd >= 0) {              \
+		int _e_ = errno;    \
+		close(fd);          \
+		errno = _e_;        \
+		fd = -EBADF;        \
 	}
 
-	if ((size_t)ret != count)
-		goto on_error;
-
-	close(fd);
-	return;
-
-on_error:
-	saved_errno = errno;
-	close(fd);
-	errno = saved_errno;
-
-	if (ret < 0)
-		ksft_exit_fail_msg("%s - Failed to write to file %s\n",
-				   strerror(errno), filename);
-
-	ksft_exit_fail_msg("Failed to write to file %s\n", filename);
-}
-
-static void change_to_userns(void)
-{
-	int ret;
-	uid_t uid;
-	gid_t gid;
-	/* {g,u}id_map files only allow a max of 4096 bytes written to them */
-	char idmap[4096];
-
-	uid = getuid();
-	gid = getgid();
-
-	ret = unshare(CLONE_NEWUSER);
-	if (ret < 0)
-		ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
-				   strerror(errno));
-
-	write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT);
-
-	ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid);
-	if (ret < 0 || (size_t)ret >= sizeof(idmap))
-		ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
-				   strerror(errno));
-
-	write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0);
-
-	ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid);
-	if (ret < 0 || (size_t)ret >= sizeof(idmap))
-		ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
-				   strerror(errno));
-
-	write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0);
-
-	ret = setgid(0);
-	if (ret)
-		ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
-				   strerror(errno));
+#define log_exit(format, ...)                                                  \
+	({                                                                     \
+		fprintf(stderr, format "\n", ##__VA_ARGS__);                   \
+		exit(EXIT_FAILURE);                                            \
+	})
 
-	ret = setuid(0);
-	if (ret)
-		ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
-				   strerror(errno));
-}
-
-static void change_to_mountns(void)
+static void change_mountns(void)
 {
 	int ret;
 
@@ -132,36 +65,31 @@ static void rmdir_protect_errno(const char *dir)
 	errno = saved_errno;
 }
 
-static void __do_binderfs_test(void)
+static int __do_binderfs_test(void)
 {
 	int fd, ret, saved_errno;
 	size_t len;
 	ssize_t wret;
-	bool keep = false;
 	struct binderfs_device device = { 0 };
 	struct binder_version version = { 0 };
+	char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
+		device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
 
-	change_to_mountns();
-
-	ret = mkdir("/dev/binderfs", 0755);
-	if (ret < 0) {
-		if (errno != EEXIST)
-			ksft_exit_fail_msg(
-				"%s - Failed to create binderfs mountpoint\n",
-				strerror(errno));
+	change_mountns();
 
-		keep = true;
-	}
+	if (!mkdtemp(binderfs_mntpt))
+		ksft_exit_fail_msg(
+			"%s - Failed to create binderfs mountpoint\n",
+			strerror(errno));
 
-	ret = mount(NULL, "/dev/binderfs", "binder", 0, 0);
+	ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
 	if (ret < 0) {
 		if (errno != ENODEV)
 			ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
 					   strerror(errno));
 
-		keep ? : rmdir_protect_errno("/dev/binderfs");
-		ksft_exit_skip(
-			"The Android binderfs filesystem is not available\n");
+		rmdir_protect_errno(binderfs_mntpt);
+		return 1;
 	}
 
 	/* binderfs mount test passed */
@@ -169,7 +97,8 @@ static void __do_binderfs_test(void)
 
 	memcpy(device.name, "my-binder", strlen("my-binder"));
 
-	fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC);
+	snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
+	fd = open(device_path, O_RDONLY | O_CLOEXEC);
 	if (fd < 0)
 		ksft_exit_fail_msg(
 			"%s - Failed to open binder-control device\n",
@@ -180,7 +109,7 @@ static void __do_binderfs_test(void)
 	close(fd);
 	errno = saved_errno;
 	if (ret < 0) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg(
 			"%s - Failed to allocate new binder device\n",
 			strerror(errno));
@@ -193,9 +122,10 @@ static void __do_binderfs_test(void)
 	/* binder device allocation test passed */
 	ksft_inc_pass_cnt();
 
-	fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY);
+	snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt);
+	fd = open(device_path, O_CLOEXEC | O_RDONLY);
 	if (fd < 0) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
 				   strerror(errno));
 	}
@@ -205,7 +135,7 @@ static void __do_binderfs_test(void)
 	close(fd);
 	errno = saved_errno;
 	if (ret < 0) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg(
 			"%s - Failed to open perform BINDER_VERSION request\n",
 			strerror(errno));
@@ -217,9 +147,9 @@ static void __do_binderfs_test(void)
 	/* binder transaction with binderfs binder device passed */
 	ksft_inc_pass_cnt();
 
-	ret = unlink("/dev/binderfs/my-binder");
+	ret = unlink(device_path);
 	if (ret < 0) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg("%s - Failed to delete binder device\n",
 				   strerror(errno));
 	}
@@ -227,12 +157,13 @@ static void __do_binderfs_test(void)
 	/* binder device removal passed */
 	ksft_inc_pass_cnt();
 
-	ret = unlink("/dev/binderfs/binder-control");
+	snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
+	ret = unlink(device_path);
 	if (!ret) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg("Managed to delete binder-control device\n");
 	} else if (errno != EPERM) {
-		keep ? : rmdir_protect_errno("/dev/binderfs");
+		rmdir_protect_errno(binderfs_mntpt);
 		ksft_exit_fail_msg(
 			"%s - Failed to delete binder-control device but exited with unexpected error code\n",
 			strerror(errno));
@@ -242,34 +173,341 @@ static void __do_binderfs_test(void)
 	ksft_inc_xfail_cnt();
 
 on_error:
-	ret = umount2("/dev/binderfs", MNT_DETACH);
-	keep ?: rmdir_protect_errno("/dev/binderfs");
+	ret = umount2(binderfs_mntpt, MNT_DETACH);
+	rmdir_protect_errno(binderfs_mntpt);
 	if (ret < 0)
 		ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
 				   strerror(errno));
 
 	/* binderfs unmount test passed */
 	ksft_inc_pass_cnt();
+	return 0;
 }
 
-static void binderfs_test_privileged()
+static int wait_for_pid(pid_t pid)
 {
-	if (geteuid() != 0)
-		ksft_print_msg(
-			"Tests are not run as root. Skipping privileged tests\n");
-	else
-		__do_binderfs_test();
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+
+		return -1;
+	}
+
+	if (!WIFEXITED(status))
+		return -1;
+
+	return WEXITSTATUS(status);
+}
+
+static int setid_userns_root(void)
+{
+	if (setuid(0))
+		return -1;
+	if (setgid(0))
+		return -1;
+
+	setfsuid(0);
+	setfsgid(0);
+
+	return 0;
+}
+
+enum idmap_type {
+	UID_MAP,
+	GID_MAP,
+};
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+	ssize_t ret;
+again:
+	ret = read(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+again:
+	ret = write(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf,
+			    size_t buf_size)
+{
+	int fd;
+	int ret;
+	char path[4096];
+
+	if (type == GID_MAP) {
+		int setgroups_fd;
+
+		snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+		setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+		if (setgroups_fd < 0 && errno != ENOENT)
+			return -1;
+
+		if (setgroups_fd >= 0) {
+			ret = write_nointr(setgroups_fd, "deny", sizeof("deny") - 1);
+			close_prot_errno_disarm(setgroups_fd);
+			if (ret != sizeof("deny") - 1)
+				return -1;
+		}
+	}
+
+	switch (type) {
+	case UID_MAP:
+		ret = snprintf(path, sizeof(path), "/proc/%d/uid_map", pid);
+		break;
+	case GID_MAP:
+		ret = snprintf(path, sizeof(path), "/proc/%d/gid_map", pid);
+		break;
+	default:
+		return -1;
+	}
+	if (ret < 0 || ret >= sizeof(path))
+		return -E2BIG;
+
+	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (fd < 0)
+		return -1;
+
+	ret = write_nointr(fd, buf, buf_size);
+	close_prot_errno_disarm(fd);
+	if (ret != buf_size)
+		return -1;
+
+	return 0;
+}
+
+static void change_userns(int syncfds[2])
+{
+	int ret;
+	char buf;
+
+	close_prot_errno_disarm(syncfds[1]);
+
+	ret = unshare(CLONE_NEWUSER);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
+				   strerror(errno));
+
+	ret = write_nointr(syncfds[0], "1", 1);
+	if (ret != 1)
+		ksft_exit_fail_msg("write_nointr() failed\n");
+
+	ret = read_nointr(syncfds[0], &buf, 1);
+	if (ret != 1)
+		ksft_exit_fail_msg("read_nointr() failed\n");
+
+	close_prot_errno_disarm(syncfds[0]);
+
+	if (setid_userns_root())
+		ksft_exit_fail_msg("setid_userns_root() failed");
+}
+
+static void change_idmaps(int syncfds[2], pid_t pid)
+{
+	int ret;
+	char buf;
+	char id_map[4096];
+
+	close_prot_errno_disarm(syncfds[0]);
+
+	ret = read_nointr(syncfds[1], &buf, 1);
+	if (ret != 1)
+		ksft_exit_fail_msg("read_nointr() failed\n");
+
+	snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid());
+	ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map));
+	if (ret)
+		ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed");
+
+	snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid());
+	ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map));
+	if (ret)
+		ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed");
+
+	ret = write_nointr(syncfds[1], "1", 1);
+	if (ret != 1)
+		ksft_exit_fail_msg("write_nointr() failed");
+
+	close_prot_errno_disarm(syncfds[1]);
 }
 
-static void binderfs_test_unprivileged()
+static void *binder_version_thread(void *data)
 {
-	change_to_userns();
-	__do_binderfs_test();
+	int fd = PTR_TO_INT(data);
+	struct binder_version version = { 0 };
+	int ret;
+
+	ret = ioctl(fd, BINDER_VERSION, &version);
+	if (ret < 0)
+		ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno));
+
+	pthread_exit(data);
 }
 
-int main(int argc, char *argv[])
+/*
+ * Regression test:
+ * 2669b8b0c798 ("binder: prevent UAF for binderfs devices")
+ * f0fe2c0f050d ("binder: prevent UAF for binderfs devices II")
+ * 211b64e4b5b6 ("binderfs: use refcount for binder control devices too")
+ */
+TEST(binderfs_stress)
 {
-	binderfs_test_privileged();
-	binderfs_test_unprivileged();
-	ksft_exit_pass();
+	int fds[1000];
+	int syncfds[2];
+	pid_t pid;
+	int fd, ret;
+	size_t len;
+	struct binderfs_device device = { 0 };
+	char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
+		device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+
+	ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+
+	pid = fork();
+	if (pid < 0) {
+		close_prot_errno_disarm(syncfds[0]);
+		close_prot_errno_disarm(syncfds[1]);
+		ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+	}
+
+	if (pid == 0) {
+		int i, j, k, nthreads;
+		pthread_attr_t attr;
+		pthread_t threads[DEFAULT_THREADS];
+		change_userns(syncfds);
+		change_mountns();
+
+		if (!mkdtemp(binderfs_mntpt))
+			log_exit("%s - Failed to create binderfs mountpoint\n",
+				 strerror(errno));
+
+		ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
+		if (ret < 0)
+			log_exit("%s - Failed to mount binderfs\n", strerror(errno));
+
+		for (int i = 0; i < ARRAY_SIZE(fds); i++) {
+
+			snprintf(device_path, sizeof(device_path),
+				 "%s/binder-control", binderfs_mntpt);
+			fd = open(device_path, O_RDONLY | O_CLOEXEC);
+			if (fd < 0)
+				log_exit("%s - Failed to open binder-control device\n", strerror(errno));
+
+			memset(&device, 0, sizeof(device));
+			snprintf(device.name, sizeof(device.name), "%d", i);
+			ret = ioctl(fd, BINDER_CTL_ADD, &device);
+			close_prot_errno_disarm(fd);
+			if (ret < 0)
+				log_exit("%s - Failed to allocate new binder device\n", strerror(errno));
+
+			snprintf(device_path, sizeof(device_path), "%s/%d",
+				 binderfs_mntpt, i);
+			fds[i] = open(device_path, O_RDONLY | O_CLOEXEC);
+			if (fds[i] < 0)
+				log_exit("%s - Failed to open binder device\n", strerror(errno));
+		}
+
+		ret = umount2(binderfs_mntpt, MNT_DETACH);
+		rmdir_protect_errno(binderfs_mntpt);
+		if (ret < 0)
+			log_exit("%s - Failed to unmount binderfs\n", strerror(errno));
+
+		nthreads = get_nprocs_conf();
+		if (nthreads > DEFAULT_THREADS)
+			nthreads = DEFAULT_THREADS;
+
+		pthread_attr_init(&attr);
+		for (k = 0; k < ARRAY_SIZE(fds); k++) {
+			for (i = 0; i < nthreads; i++) {
+				ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k]));
+				if (ret) {
+					ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i);
+					break;
+				}
+			}
+
+			for (j = 0; j < i; j++) {
+				void *fdptr = NULL;
+
+				ret = pthread_join(threads[j], &fdptr);
+				if (ret)
+					ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr));
+			}
+		}
+		pthread_attr_destroy(&attr);
+
+		for (k = 0; k < ARRAY_SIZE(fds); k++)
+			close(fds[k]);
+
+		exit(EXIT_SUCCESS);
+	}
+
+	change_idmaps(syncfds, pid);
+
+	ret = wait_for_pid(pid);
+	if (ret)
+		ksft_exit_fail_msg("wait_for_pid() failed");
 }
+
+TEST(binderfs_test_privileged)
+{
+	if (geteuid() != 0)
+		XFAIL(return, "Tests are not run as root. Skipping privileged tests");
+
+	if (__do_binderfs_test() == 1)
+		XFAIL(return, "The Android binderfs filesystem is not available");
+}
+
+TEST(binderfs_test_unprivileged)
+{
+	int ret;
+	int syncfds[2];
+	pid_t pid;
+
+	ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+
+	pid = fork();
+	if (pid < 0) {
+		close_prot_errno_disarm(syncfds[0]);
+		close_prot_errno_disarm(syncfds[1]);
+		ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+	}
+
+	if (pid == 0) {
+		change_userns(syncfds);
+		if (__do_binderfs_test() == 1)
+			exit(2);
+		exit(EXIT_SUCCESS);
+	}
+
+	change_idmaps(syncfds, pid);
+
+	ret = wait_for_pid(pid);
+	if (ret) {
+		if (ret == 2)
+			XFAIL(return, "The Android binderfs filesystem is not available");
+		else
+			ksft_exit_fail_msg("wait_for_pid() failed");
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore
index 9ae8db44ec14..9090157258b1 100644
--- a/tools/testing/selftests/filesystems/epoll/.gitignore
+++ b/tools/testing/selftests/filesystems/epoll/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 epoll_wakeup_test
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 012b2cf69c11..40211cd8f0e6 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -1,13 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
 # Makefile for firmware loading selftests
-
-# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
-all:
+CFLAGS = -Wall \
+         -O2
 
 TEST_PROGS := fw_run_tests.sh
 TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+TEST_GEN_FILES := fw_namespace
 
 include ../lib.mk
-
-# Nothing to clean up.
-clean:
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 56894477c8bd..fcc281373b4d 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -86,6 +86,29 @@ else
 	fi
 fi
 
+# Try platform (EFI embedded fw) loading too
+if [ ! -e "$DIR"/trigger_request_platform ]; then
+	echo "$0: firmware loading: platform trigger not present, ignoring test" >&2
+else
+	if printf '\000' >"$DIR"/trigger_request_platform 2> /dev/null; then
+		echo "$0: empty filename should not succeed (platform)" >&2
+		exit 1
+	fi
+
+	# Note we echo a non-existing name, since files on the file-system
+	# are preferred over firmware embedded inside the platform's firmware
+	# The test adds a fake entry with the requested name to the platform's
+	# fw list, so the name does not matter as long as it does not exist
+	if ! echo -n "nope-$NAME" >"$DIR"/trigger_request_platform ; then
+		echo "$0: could not trigger request platform" >&2
+		exit 1
+	fi
+
+	# The test verifies itself that the loaded firmware contents matches
+	# the contents for the fake platform fw entry it added.
+	echo "$0: platform loading works"
+fi
+
 ### Batched requests tests
 test_config_present()
 {
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
new file mode 100644
index 000000000000..5ebc1aec7923
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test triggering of loading of firmware from different mount
+ * namespaces. Expect firmware to be always loaded from the mount
+ * namespace of PID 1. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+
+static char *fw_path = NULL;
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	if (fw_path)
+		unlink(fw_path);
+	umount("/lib/firmware");
+	exit(EXIT_FAILURE);
+}
+
+static void trigger_fw(const char *fw_name, const char *sys_path)
+{
+	int fd;
+
+	fd = open(sys_path, O_WRONLY);
+	if (fd < 0)
+		die("open failed: %s\n",
+		    strerror(errno));
+	if (write(fd, fw_name, strlen(fw_name)) != strlen(fw_name))
+		exit(EXIT_FAILURE);
+	close(fd);
+}
+
+static void setup_fw(const char *fw_path)
+{
+	int fd;
+	const char fw[] = "ABCD0123";
+
+	fd = open(fw_path, O_WRONLY | O_CREAT, 0600);
+	if (fd < 0)
+		die("open failed: %s\n",
+		    strerror(errno));
+	if (write(fd, fw, sizeof(fw) -1) != sizeof(fw) -1)
+		die("write failed: %s\n",
+		    strerror(errno));
+	close(fd);
+}
+
+static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_fw_in_parent_ns)
+{
+	pid_t child;
+
+	if (block_fw_in_parent_ns)
+		if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1)
+			die("blocking firmware in parent ns failed\n");
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		if (block_fw_in_parent_ns)
+			umount("/lib/firmware");
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+	if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) == -1)
+		die("remount root in child ns failed\n");
+
+	if (!block_fw_in_parent_ns) {
+		if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1)
+			die("blocking firmware in child ns failed\n");
+	} else
+		umount("/lib/firmware");
+
+	trigger_fw(fw_name, sys_path);
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	const char *fw_name = "test-firmware.bin";
+	char *sys_path;
+	if (argc != 2)
+		die("usage: %s sys_path\n", argv[0]);
+
+	/* Mount tmpfs to /lib/firmware so we don't have to assume
+	   that it is writable for us.*/
+	if (mount("test", "/lib/firmware", "tmpfs", 0, NULL) == -1)
+		die("mounting tmpfs to /lib/firmware failed\n");
+
+	sys_path = argv[1];
+	asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+
+	setup_fw(fw_path);
+
+	setvbuf(stdout, NULL, _IONBF, 0);
+	/* Positive case: firmware in PID1 mount namespace */
+	printf("Testing with firmware in parent namespace (assumed to be same file system as PID1)\n");
+	if (!test_fw_in_ns(fw_name, sys_path, false))
+		die("error: failed to access firmware\n");
+
+	/* Negative case: firmware in child mount namespace, expected to fail */
+	printf("Testing with firmware in child namespace\n");
+	if (test_fw_in_ns(fw_name, sys_path, true))
+		die("error: firmware access did not fail\n");
+
+	unlink(fw_path);
+	free(fw_path);
+	umount("/lib/firmware");
+	exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 8e14d555c197..777377078d5e 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -61,6 +61,10 @@ run_test_config_0003()
 check_mods
 check_setup
 
+echo "Running namespace test: "
+$TEST_DIR/fw_namespace $DIR/trigger_request
+echo "OK"
+
 if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
 	run_test_config_0001
 	run_test_config_0002
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
index 98d8a5a63049..2659417cb2c7 100644
--- a/tools/testing/selftests/ftrace/.gitignore
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 logs
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 18fdaab9f570..68ff3f45c720 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -23,7 +23,7 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-echo "Test histogram multiple tiggers"
+echo "Test histogram multiple triggers"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
 echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index a09f57061902..0efcd494daab 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 futex_requeue_pi
 futex_requeue_pi_mismatched_ops
 futex_requeue_pi_signal_restart
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 7d14f743d1a4..4c69408f3e84 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gpio-mockup-chardev
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
index ab806edc8732..e962fb2a08d5 100644
--- a/tools/testing/selftests/ia64/.gitignore
+++ b/tools/testing/selftests/ia64/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 aliasing-test
diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore
index 3bfcbae5fa13..862de222a3f3 100644
--- a/tools/testing/selftests/intel_pstate/.gitignore
+++ b/tools/testing/selftests/intel_pstate/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 aperf
 msr
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
index 9af04c9353c0..9ed280e4c704 100644
--- a/tools/testing/selftests/ipc/.gitignore
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 msgque_test
 msgque
diff --git a/tools/testing/selftests/ir/.gitignore b/tools/testing/selftests/ir/.gitignore
index 070ea0c75fb8..0bbada8c1811 100644
--- a/tools/testing/selftests/ir/.gitignore
+++ b/tools/testing/selftests/ir/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ir_loopback
diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore
index 5a9b3732b2de..38ccdfe80ef7 100644
--- a/tools/testing/selftests/kcmp/.gitignore
+++ b/tools/testing/selftests/kcmp/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 kcmp_test
 kcmp-test-file
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 5336b26506ab..2902f6a78f8a 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -635,10 +635,12 @@
 struct __test_metadata {
 	const char *name;
 	void (*fn)(struct __test_metadata *);
+	pid_t pid;	/* pid of test when being run */
 	int termsig;
 	int passed;
 	int trigger; /* extra handler after the evaluation */
-	int timeout;
+	int timeout;	/* seconds to wait for test timeout */
+	bool timed_out;	/* did this test timeout instead of exiting? */
 	__u8 step;
 	bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
 	struct __test_metadata *prev, *next;
@@ -695,64 +697,116 @@ static inline int __bail(int for_realz, bool no_print, __u8 step)
 	return 0;
 }
 
-void __run_test(struct __test_metadata *t)
+struct __test_metadata *__active_test;
+static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
 {
-	pid_t child_pid;
+	struct __test_metadata *t = __active_test;
+
+	/* Sanity check handler execution environment. */
+	if (!t) {
+		fprintf(TH_LOG_STREAM,
+			"no active test in SIGARLM handler!?\n");
+		abort();
+	}
+	if (sig != SIGALRM || sig != info->si_signo) {
+		fprintf(TH_LOG_STREAM,
+			"%s: SIGALRM handler caught signal %d!?\n",
+			t->name, sig != SIGALRM ? sig : info->si_signo);
+		abort();
+	}
+
+	t->timed_out = true;
+	kill(t->pid, SIGKILL);
+}
+
+void __wait_for_test(struct __test_metadata *t)
+{
+	struct sigaction action = {
+		.sa_sigaction = __timeout_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+	struct sigaction saved_action;
 	int status;
 
+	if (sigaction(SIGALRM, &action, &saved_action)) {
+		t->passed = 0;
+		fprintf(TH_LOG_STREAM,
+			"%s: unable to install SIGARLM handler\n",
+			t->name);
+		return;
+	}
+	__active_test = t;
+	t->timed_out = false;
+	alarm(t->timeout);
+	waitpid(t->pid, &status, 0);
+	alarm(0);
+	if (sigaction(SIGALRM, &saved_action, NULL)) {
+		t->passed = 0;
+		fprintf(TH_LOG_STREAM,
+			"%s: unable to uninstall SIGARLM handler\n",
+			t->name);
+		return;
+	}
+	__active_test = NULL;
+
+	if (t->timed_out) {
+		t->passed = 0;
+		fprintf(TH_LOG_STREAM,
+			"%s: Test terminated by timeout\n", t->name);
+	} else if (WIFEXITED(status)) {
+		t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
+		if (t->termsig != -1) {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test exited normally "
+				"instead of by signal (code: %d)\n",
+				t->name,
+				WEXITSTATUS(status));
+		} else if (!t->passed) {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test failed at step #%d\n",
+				t->name,
+				WEXITSTATUS(status));
+		}
+	} else if (WIFSIGNALED(status)) {
+		t->passed = 0;
+		if (WTERMSIG(status) == SIGABRT) {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test terminated by assertion\n",
+				t->name);
+		} else if (WTERMSIG(status) == t->termsig) {
+			t->passed = 1;
+		} else {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test terminated unexpectedly "
+				"by signal %d\n",
+				t->name,
+				WTERMSIG(status));
+		}
+	} else {
+		fprintf(TH_LOG_STREAM,
+			"%s: Test ended in some other way [%u]\n",
+			t->name,
+			status);
+	}
+}
+
+void __run_test(struct __test_metadata *t)
+{
 	t->passed = 1;
 	t->trigger = 0;
 	printf("[ RUN      ] %s\n", t->name);
-	alarm(t->timeout);
-	child_pid = fork();
-	if (child_pid < 0) {
+	t->pid = fork();
+	if (t->pid < 0) {
 		printf("ERROR SPAWNING TEST CHILD\n");
 		t->passed = 0;
-	} else if (child_pid == 0) {
+	} else if (t->pid == 0) {
 		t->fn(t);
 		/* return the step that failed or 0 */
 		_exit(t->passed ? 0 : t->step);
 	} else {
-		/* TODO(wad) add timeout support. */
-		waitpid(child_pid, &status, 0);
-		if (WIFEXITED(status)) {
-			t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
-			if (t->termsig != -1) {
-				fprintf(TH_LOG_STREAM,
-					"%s: Test exited normally "
-					"instead of by signal (code: %d)\n",
-					t->name,
-					WEXITSTATUS(status));
-			} else if (!t->passed) {
-				fprintf(TH_LOG_STREAM,
-					"%s: Test failed at step #%d\n",
-					t->name,
-					WEXITSTATUS(status));
-			}
-		} else if (WIFSIGNALED(status)) {
-			t->passed = 0;
-			if (WTERMSIG(status) == SIGABRT) {
-				fprintf(TH_LOG_STREAM,
-					"%s: Test terminated by assertion\n",
-					t->name);
-			} else if (WTERMSIG(status) == t->termsig) {
-				t->passed = 1;
-			} else {
-				fprintf(TH_LOG_STREAM,
-					"%s: Test terminated unexpectedly "
-					"by signal %d\n",
-					t->name,
-					WTERMSIG(status));
-			}
-		} else {
-			fprintf(TH_LOG_STREAM,
-				"%s: Test ended in some other way [%u]\n",
-				t->name,
-				status);
-		}
+		__wait_for_test(t);
 	}
 	printf("[     %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
-	alarm(0);
 }
 
 static int test_harness_run(int __attribute__((unused)) argc,
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 30072c3f52fb..a9b2b48947ff 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,13 +1,17 @@
-/s390x/sync_regs_test
+# SPDX-License-Identifier: GPL-2.0-only
 /s390x/memop
+/s390x/resets
+/s390x/sync_regs_test
 /x86_64/cr4_cpuid_sync_test
 /x86_64/evmcs_test
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
+/x86_64/set_memory_region_test
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
+/x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
@@ -15,5 +19,7 @@
 /x86_64/vmx_tsc_adjust_test
 /x86_64/xss_msr_test
 /clear_dirty_log_test
+/demand_paging_test
 /dirty_log_test
 /kvm_create_max_vcpus
+/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d91c53b726e6..712a2ddd2a27 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -7,7 +7,7 @@ top_srcdir = ../../../..
 KSFT_KHDR_INSTALL := 1
 UNAME_M := $(shell uname -m)
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
@@ -17,27 +17,33 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
+TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
+TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += steal_time
 
 TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/resets
+TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
index 749336937d37..11672ec6f74e 100644
--- a/tools/testing/selftests/kvm/clear_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c
@@ -1,2 +1,6 @@
 #define USE_CLEAR_DIRTY_LOG
+#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
+#define KVM_DIRTY_LOG_INITIALLY_SET         (1 << 1)
+#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+		KVM_DIRTY_LOG_INITIALLY_SET)
 #include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
new file mode 100644
index 000000000000..360cd3ea4cd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM demand paging test
+ * Adapted from dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#ifdef __NR_userfaultfd
+
+/* The memory slot index demand page */
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM		0xc0000000
+
+#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#define MAX_VCPUS 512
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+
+static char *guest_data_prototype;
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_args {
+	uint64_t gva;
+	uint64_t pages;
+
+	/* Only used by the host userspace part of the vCPU thread */
+	int vcpu_id;
+	struct kvm_vm *vm;
+};
+
+static struct vcpu_args vcpu_args[MAX_VCPUS];
+
+/*
+ * Continuously write to the first 8 bytes of each page in the demand paging
+ * memory region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+	uint64_t gva;
+	uint64_t pages;
+	int i;
+
+	/* Make sure vCPU args data structure is not corrupt. */
+	GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
+
+	gva = vcpu_args[vcpu_id].gva;
+	pages = vcpu_args[vcpu_id].pages;
+
+	for (i = 0; i < pages; i++) {
+		uint64_t addr = gva + (i * guest_page_size);
+
+		addr &= ~(host_page_size - 1);
+		*(uint64_t *)addr = 0x0123456789ABCDEF;
+	}
+
+	GUEST_SYNC(1);
+}
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct vcpu_args *args = (struct vcpu_args *)data;
+	struct kvm_vm *vm = args->vm;
+	int vcpu_id = args->vcpu_id;
+	struct kvm_run *run;
+	struct timespec start, end, ts_diff;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+	run = vcpu_state(vm, vcpu_id);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	/* Let the guest access its memory */
+	ret = _vcpu_run(vm, vcpu_id);
+	TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+	if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+		TEST_ASSERT(false,
+			    "Invalid guest sync status: exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+	ts_diff = timespec_sub(end, start);
+	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+		       ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	return NULL;
+}
+
+#define PAGE_SHIFT_4K  12
+#define PTES_PER_4K_PT 512
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+				uint64_t vcpu_memory_bytes)
+{
+	struct kvm_vm *vm;
+	uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+
+	/* Account for a few pages per-vCPU for stacks */
+	pages += DEFAULT_STACK_PGS * vcpus;
+
+	/*
+	 * Reserve twice the ammount of memory needed to map the test region and
+	 * the page table / stacks region, at 4k, for page tables. Do the
+	 * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+	 * page size guest will need even less memory for page tables).
+	 */
+	pages += (2 * pages) / PTES_PER_4K_PT;
+	pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+		 PTES_PER_4K_PT;
+	pages = vm_adjust_num_guest_pages(mode, pages);
+
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	vm = _vm_create(mode, pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	return vm;
+}
+
+static int handle_uffd_page_request(int uffd, uint64_t addr)
+{
+	pid_t tid;
+	struct timespec start;
+	struct timespec end;
+	struct uffdio_copy copy;
+	int r;
+
+	tid = syscall(__NR_gettid);
+
+	copy.src = (uint64_t)guest_data_prototype;
+	copy.dst = addr;
+	copy.len = host_page_size;
+	copy.mode = 0;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	r = ioctl(uffd, UFFDIO_COPY, &copy);
+	if (r == -1) {
+		pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
+			addr, tid, errno);
+		return r;
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+		       timespec_to_ns(timespec_sub(end, start)));
+	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
+		       host_page_size, addr, tid);
+
+	return 0;
+}
+
+bool quit_uffd_thread;
+
+struct uffd_handler_args {
+	int uffd;
+	int pipefd;
+	useconds_t delay;
+};
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+	struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
+	int uffd = uffd_args->uffd;
+	int pipefd = uffd_args->pipefd;
+	useconds_t delay = uffd_args->delay;
+	int64_t pages = 0;
+	struct timespec start, end, ts_diff;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	while (!quit_uffd_thread) {
+		struct uffd_msg msg;
+		struct pollfd pollfd[2];
+		char tmp_chr;
+		int r;
+		uint64_t addr;
+
+		pollfd[0].fd = uffd;
+		pollfd[0].events = POLLIN;
+		pollfd[1].fd = pipefd;
+		pollfd[1].events = POLLIN;
+
+		r = poll(pollfd, 2, -1);
+		switch (r) {
+		case -1:
+			pr_info("poll err");
+			continue;
+		case 0:
+			continue;
+		case 1:
+			break;
+		default:
+			pr_info("Polling uffd returned %d", r);
+			return NULL;
+		}
+
+		if (pollfd[0].revents & POLLERR) {
+			pr_info("uffd revents has POLLERR");
+			return NULL;
+		}
+
+		if (pollfd[1].revents & POLLIN) {
+			r = read(pollfd[1].fd, &tmp_chr, 1);
+			TEST_ASSERT(r == 1,
+				    "Error reading pipefd in UFFD thread\n");
+			return NULL;
+		}
+
+		if (!pollfd[0].revents & POLLIN)
+			continue;
+
+		r = read(uffd, &msg, sizeof(msg));
+		if (r == -1) {
+			if (errno == EAGAIN)
+				continue;
+			pr_info("Read of uffd gor errno %d", errno);
+			return NULL;
+		}
+
+		if (r != sizeof(msg)) {
+			pr_info("Read on uffd returned unexpected size: %d bytes", r);
+			return NULL;
+		}
+
+		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+			continue;
+
+		if (delay)
+			usleep(delay);
+		addr =  msg.arg.pagefault.address;
+		r = handle_uffd_page_request(uffd, addr);
+		if (r < 0)
+			return NULL;
+		pages++;
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+	ts_diff = timespec_sub(end, start);
+	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+	return NULL;
+}
+
+static int setup_demand_paging(struct kvm_vm *vm,
+			       pthread_t *uffd_handler_thread, int pipefd,
+			       useconds_t uffd_delay,
+			       struct uffd_handler_args *uffd_args,
+			       void *hva, uint64_t len)
+{
+	int uffd;
+	struct uffdio_api uffdio_api;
+	struct uffdio_register uffdio_register;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd == -1) {
+		pr_info("uffd creation failed\n");
+		return -1;
+	}
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+		pr_info("ioctl uffdio_api failed\n");
+		return -1;
+	}
+
+	uffdio_register.range.start = (uint64_t)hva;
+	uffdio_register.range.len = len;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+		pr_info("ioctl uffdio_register failed\n");
+		return -1;
+	}
+
+	if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
+			UFFD_API_RANGE_IOCTLS) {
+		pr_info("unexpected userfaultfd ioctl set\n");
+		return -1;
+	}
+
+	uffd_args->uffd = uffd;
+	uffd_args->pipefd = pipefd;
+	uffd_args->delay = uffd_delay;
+	pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
+		       uffd_args);
+
+	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+		       hva, hva + len);
+
+	return 0;
+}
+
+static void run_test(enum vm_guest_mode mode, bool use_uffd,
+		     useconds_t uffd_delay, int vcpus,
+		     uint64_t vcpu_memory_bytes)
+{
+	pthread_t *vcpu_threads;
+	pthread_t *uffd_handler_threads = NULL;
+	struct uffd_handler_args *uffd_args = NULL;
+	struct timespec start, end, ts_diff;
+	int *pipefds = NULL;
+	struct kvm_vm *vm;
+	uint64_t guest_num_pages;
+	int vcpu_id;
+	int r;
+
+	vm = create_vm(mode, vcpus, vcpu_memory_bytes);
+
+	guest_page_size = vm_get_page_size(vm);
+
+	TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
+		    "Guest memory size is not guest page size aligned.");
+
+	guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+	/*
+	 * If there should be more memory in the guest test region than there
+	 * can be pages in the guest, it will definitely cause problems.
+	 */
+	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+		    "Requested more guest memory than address space allows.\n"
+		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
+		    vcpu_memory_bytes);
+
+	host_page_size = getpagesize();
+	TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
+		    "Guest memory size is not host page size aligned.");
+
+	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+			      guest_page_size;
+	guest_test_phys_mem &= ~(host_page_size - 1);
+
+#ifdef __s390x__
+	/* Align to 1M (segment size) */
+	guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+	/* Add an extra memory slot for testing demand paging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_phys_mem,
+				    TEST_MEM_SLOT_INDEX,
+				    guest_num_pages, 0);
+
+	/* Do mapping for the demand paging memory slot */
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+	ucall_init(vm, NULL);
+
+	guest_data_prototype = malloc(host_page_size);
+	TEST_ASSERT(guest_data_prototype,
+		    "Failed to allocate buffer for guest data pattern");
+	memset(guest_data_prototype, 0xAB, host_page_size);
+
+	vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
+	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+	if (use_uffd) {
+		uffd_handler_threads =
+			malloc(vcpus * sizeof(*uffd_handler_threads));
+		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
+
+		uffd_args = malloc(vcpus * sizeof(*uffd_args));
+		TEST_ASSERT(uffd_args, "Memory allocation failed");
+
+		pipefds = malloc(sizeof(int) * vcpus * 2);
+		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+	}
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		vm_paddr_t vcpu_gpa;
+		void *vcpu_hva;
+
+		vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+		PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+			       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+
+		/* Cache the HVA pointer of the region */
+		vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+
+		if (use_uffd) {
+			/*
+			 * Set up user fault fd to handle demand paging
+			 * requests.
+			 */
+			r = pipe2(&pipefds[vcpu_id * 2],
+				  O_CLOEXEC | O_NONBLOCK);
+			TEST_ASSERT(!r, "Failed to set up pipefd");
+
+			r = setup_demand_paging(vm,
+						&uffd_handler_threads[vcpu_id],
+						pipefds[vcpu_id * 2],
+						uffd_delay, &uffd_args[vcpu_id],
+						vcpu_hva, vcpu_memory_bytes);
+			if (r < 0)
+				exit(-r);
+		}
+
+#ifdef __x86_64__
+		vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+		vcpu_args[vcpu_id].vm = vm;
+		vcpu_args[vcpu_id].vcpu_id = vcpu_id;
+		vcpu_args[vcpu_id].gva = guest_test_virt_mem +
+					 (vcpu_id * vcpu_memory_bytes);
+		vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
+	}
+
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, host_page_size);
+	sync_global_to_guest(vm, guest_page_size);
+	sync_global_to_guest(vm, vcpu_args);
+
+	pr_info("Finished creating vCPUs and starting uffd threads\n");
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+			       &vcpu_args[vcpu_id]);
+	}
+
+	pr_info("Started all vCPUs\n");
+
+	/* Wait for the vcpu threads to quit */
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
+	}
+
+	pr_info("All vCPU threads joined\n");
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	if (use_uffd) {
+		char c;
+
+		/* Tell the user fault fd handler threads to quit */
+		for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+			r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
+			TEST_ASSERT(r == 1, "Unable to write to pipefd");
+
+			pthread_join(uffd_handler_threads[vcpu_id], NULL);
+		}
+	}
+
+	ts_diff = timespec_sub(end, start);
+	pr_info("Total guest execution time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+	pr_info("Overall demand paging rate: %f pgs/sec\n",
+		guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+
+	free(guest_data_prototype);
+	free(vcpu_threads);
+	if (use_uffd) {
+		free(uffd_handler_threads);
+		free(uffd_args);
+		free(pipefds);
+	}
+}
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
+	       "          [-b memory] [-v vcpus]\n", name);
+	printf(" -m: specify the guest mode ID to test\n"
+	       "     (default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+	printf(" -u: use User Fault FD to handle vCPU page\n"
+	       "     faults.\n");
+	printf(" -d: add a delay in usec to the User Fault\n"
+	       "     FD handler to simulate demand paging\n"
+	       "     overheads. Ignored without -u.\n");
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
+	       "     Default: 1G\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	bool mode_selected = false;
+	uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
+	int vcpus = 1;
+	unsigned int mode;
+	int opt, i;
+	bool use_uffd = false;
+	useconds_t uffd_delay = 0;
+
+#ifdef __x86_64__
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+#ifdef __s390x__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+	while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+		switch (opt) {
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					guest_modes[i].enabled = false;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			guest_modes[mode].enabled = true;
+			break;
+		case 'u':
+			use_uffd = true;
+			break;
+		case 'd':
+			uffd_delay = strtoul(optarg, NULL, 0);
+			TEST_ASSERT(uffd_delay >= 0,
+				    "A negative UFFD delay is not supported.");
+			break;
+		case 'b':
+			vcpu_memory_bytes = parse_size(optarg);
+			break;
+		case 'v':
+			vcpus = atoi(optarg);
+			TEST_ASSERT(vcpus > 0,
+				    "Must have a positive number of vCPUs");
+			TEST_ASSERT(vcpus <= MAX_VCPUS,
+				    "This test does not currently support\n"
+				    "more than %d vCPUs.", MAX_VCPUS);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+	}
+
+	return 0;
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+	print_skip("__NR_userfaultfd must be present for userfaultfd test");
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 5614222a6628..752ec158ac59 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -166,24 +166,22 @@ static void *vcpu_worker(void *data)
 			pages_count += TEST_PAGES_PER_LOOP;
 			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 		} else {
-			TEST_ASSERT(false,
-				    "Invalid guest sync status: "
-				    "exit_reason=%s\n",
-				    exit_reason_str(run->exit_reason));
+			TEST_FAIL("Invalid guest sync status: "
+				  "exit_reason=%s\n",
+				  exit_reason_str(run->exit_reason));
 		}
 	}
 
-	DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
+	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
 
 	return NULL;
 }
 
-static void vm_dirty_log_verify(unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 {
+	uint64_t step = vm_num_host_pages(mode, 1);
 	uint64_t page;
 	uint64_t *value_ptr;
-	uint64_t step = host_page_size >= guest_page_size ? 1 :
-				guest_page_size / host_page_size;
 
 	for (page = 0; page < host_num_pages; page += step) {
 		value_ptr = host_test_mem + page * host_page_size;
@@ -252,6 +250,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 	struct kvm_vm *vm;
 	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
 	vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
@@ -264,6 +264,10 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #define DIRTY_MEM_BITS 30 /* 1G */
 #define PAGE_SHIFT_4K  12
 
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		     unsigned long interval, uint64_t phys_offset)
 {
@@ -289,14 +293,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	 * case where the size is not aligned to 64 pages.
 	 */
 	guest_num_pages = (1ul << (DIRTY_MEM_BITS -
-				   vm_get_page_shift(vm))) + 16;
-#ifdef __s390x__
-	/* Round up to multiple of 1M (segment size) */
-	guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
-#endif
+				   vm_get_page_shift(vm))) + 3;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
 	host_page_size = getpagesize();
-	host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
-			 !!((guest_num_pages * guest_page_size) % host_page_size);
+	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 
 	if (!phys_offset) {
 		guest_test_phys_mem = (vm_get_max_gfn(vm) -
@@ -311,7 +312,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	guest_test_phys_mem &= ~((1 << 20) - 1);
 #endif
 
-	DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
 	bmap = bitmap_alloc(host_num_pages);
 	host_bmap_track = bitmap_alloc(host_num_pages);
@@ -320,7 +321,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	struct kvm_enable_cap cap = {};
 
 	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-	cap.args[0] = 1;
+	cap.args[0] = dirty_log_manual_caps;
 	vm_enable_cap(vm, &cap);
 #endif
 
@@ -332,8 +333,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 				    KVM_MEM_LOG_DIRTY_PAGES);
 
 	/* Do mapping for the dirty track memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem,
-		 guest_num_pages * guest_page_size, 0);
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
 
 	/* Cache the HVA pointer of the region */
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
@@ -341,9 +341,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 #ifdef __x86_64__
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 #endif
-#ifdef __aarch64__
 	ucall_init(vm, NULL);
-#endif
 
 	/* Export the shared variables to the guest */
 	sync_global_to_guest(vm, host_page_size);
@@ -369,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
 				       host_num_pages);
 #endif
-		vm_dirty_log_verify(bmap);
+		vm_dirty_log_verify(mode, bmap);
 		iteration++;
 		sync_global_to_guest(vm, iteration);
 	}
@@ -378,9 +376,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	host_quit = true;
 	pthread_join(vcpu_thread, NULL);
 
-	DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
-	      "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
-	      host_track_next_count);
+	pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+		"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+		host_track_next_count);
 
 	free(bmap);
 	free(host_bmap_track);
@@ -388,15 +386,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	kvm_vm_free(vm);
 }
 
-struct vm_guest_mode_params {
+struct guest_mode {
 	bool supported;
 	bool enabled;
 };
-struct vm_guest_mode_params vm_guest_mode_params[NUM_VM_MODES];
+static struct guest_mode guest_modes[NUM_VM_MODES];
 
-#define vm_guest_mode_params_init(mode, supported, enabled)					\
-({												\
-	vm_guest_mode_params[mode] = (struct vm_guest_mode_params){ supported, enabled };	\
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
 })
 
 static void help(char *name)
@@ -419,7 +416,7 @@ static void help(char *name)
 	       "     Guest mode IDs:\n");
 	for (i = 0; i < NUM_VM_MODES; ++i) {
 		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-		       vm_guest_mode_params[i].supported ? " (supported)" : "");
+		       guest_modes[i].supported ? " (supported)" : "");
 	}
 	puts("");
 	exit(0);
@@ -433,34 +430,38 @@ int main(int argc, char *argv[])
 	uint64_t phys_offset = 0;
 	unsigned int mode;
 	int opt, i;
-#ifdef __aarch64__
-	unsigned int host_ipa_limit;
-#endif
 
 #ifdef USE_CLEAR_DIRTY_LOG
-	if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) {
-		fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
+	dirty_log_manual_caps =
+		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+	if (!dirty_log_manual_caps) {
+		print_skip("KVM_CLEAR_DIRTY_LOG not available");
 		exit(KSFT_SKIP);
 	}
+	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+				  KVM_DIRTY_LOG_INITIALLY_SET);
 #endif
 
 #ifdef __x86_64__
-	vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
 #endif
 #ifdef __aarch64__
-	vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
-	vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true);
-
-	host_ipa_limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-	if (host_ipa_limit >= 52)
-		vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true);
-	if (host_ipa_limit >= 48) {
-		vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true);
-		vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true);
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
 	}
 #endif
 #ifdef __s390x__
-	vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
 #endif
 
 	while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
@@ -477,13 +478,13 @@ int main(int argc, char *argv[])
 		case 'm':
 			if (!mode_selected) {
 				for (i = 0; i < NUM_VM_MODES; ++i)
-					vm_guest_mode_params[i].enabled = false;
+					guest_modes[i].enabled = false;
 				mode_selected = true;
 			}
 			mode = strtoul(optarg, NULL, 10);
 			TEST_ASSERT(mode < NUM_VM_MODES,
 				    "Guest mode ID %d too big", mode);
-			vm_guest_mode_params[mode].enabled = true;
+			guest_modes[mode].enabled = true;
 			break;
 		case 'h':
 		default:
@@ -495,15 +496,15 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
 	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
 
-	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
-	      iterations, interval);
+	pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+		iterations, interval);
 
 	srandom(time(0));
 
 	for (i = 0; i < NUM_VM_MODES; ++i) {
-		if (!vm_guest_mode_params[i].enabled)
+		if (!guest_modes[i].enabled)
 			continue;
-		TEST_ASSERT(vm_guest_mode_params[i].supported,
+		TEST_ASSERT(guest_modes[i].supported,
 			    "Guest mode ID %d (%s) not supported.",
 			    i, vm_guest_mode_string(i));
 		run_test(i, iterations, interval, phys_offset);
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
index 4912d23844bc..d8f4d6bfe05d 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -16,6 +16,8 @@
 #define u32 uint32_t
 #define u64 uint64_t
 
+#define EVMCS_VERSION 1
+
 extern bool enable_evmcs;
 
 struct hv_vp_assist_page {
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index ae0d14c2540a..a99b875f50d2 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -16,7 +16,8 @@
 #include "sparsebit.h"
 
 
-/* Callers of kvm_util only have an incomplete/opaque description of the
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
  * structure kvm_util is using to maintain the state of a VM.
  */
 struct kvm_vm;
@@ -24,12 +25,6 @@ struct kvm_vm;
 typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
-#ifndef NDEBUG
-#define DEBUG(...) printf(__VA_ARGS__);
-#else
-#define DEBUG(...)
-#endif
-
 /* Minimum allocated guest virtual and physical addresses */
 #define KVM_UTIL_MIN_VADDR		0x2000
 
@@ -84,6 +79,23 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
 		     uint32_t data_memslot, uint32_t pgd_memslot);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   vm     - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
 	       uint8_t indent);
 
@@ -100,14 +112,31 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 		void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 			  uint32_t data_memslot, uint32_t pgd_memslot);
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	      size_t size, uint32_t pgd_memslot);
+	      unsigned int npages, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
 
 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
@@ -118,7 +147,27 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		       struct kvm_mp_state *mp_state);
 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
 		    struct kvm_sregs *sregs);
 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
@@ -147,15 +196,57 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
 const char *exit_reason_str(unsigned int exit_reason);
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - VM Virtual Address
+ *   paddr - VM Physical Address
+ *   memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		 uint32_t pgd_memslot);
+		 uint32_t memslot);
+
 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 			     uint32_t memslot);
 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 			      vm_paddr_t paddr_min, uint32_t memslot);
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The number of extra pages to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using memslot 0)
+ *   guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 				 void *guest_code);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - The id of the VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ */
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
@@ -164,6 +255,21 @@ unsigned int vm_get_page_size(struct kvm_vm *vm);
 unsigned int vm_get_page_shift(struct kvm_vm *vm);
 unsigned int vm_get_max_gfn(struct kvm_vm *vm);
 
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+	unsigned int n;
+	n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+	/* s390 requires 1M aligned guest sizes */
+	n = (n + 255) & ~255;
+#endif
+	return n;
+}
+
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 				 uint64_t end);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index a41db6fb7e24..5eb01bf51b86 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -19,12 +19,28 @@
 #include <fcntl.h>
 #include "kselftest.h"
 
+static inline int _no_printf(const char *format, ...) { return 0; }
+
+#ifdef DEBUG
+#define pr_debug(...) printf(__VA_ARGS__)
+#else
+#define pr_debug(...) _no_printf(__VA_ARGS__)
+#endif
+#ifndef QUIET
+#define pr_info(...) printf(__VA_ARGS__)
+#else
+#define pr_info(...) _no_printf(__VA_ARGS__)
+#endif
+
+void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
 ssize_t test_write(int fd, const void *buf, size_t count);
 ssize_t test_read(int fd, void *buf, size_t count);
 int test_seq_read(const char *path, char **bufp, size_t *sizep);
 
 void test_assert(bool exp, const char *exp_str,
-		 const char *file, unsigned int line, const char *fmt, ...);
+		 const char *file, unsigned int line, const char *fmt, ...)
+		__attribute__((format(printf, 5, 6)));
 
 #define TEST_ASSERT(e, fmt, ...) \
 	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
@@ -39,4 +55,14 @@ void test_assert(bool exp, const char *exp_str,
 		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
 } while (0)
 
+#define TEST_FAIL(fmt, ...) \
+	TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+
+size_t parse_size(const char *size);
+
+int64_t timespec_to_ns(struct timespec ts);
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 6f38c3dc0d56..0299cd81b8ba 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -24,8 +24,8 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
 	struct kvm_vm *vm;
 	int i;
 
-	printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
-	       num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
+	pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
+		num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
 
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
 
@@ -41,8 +41,8 @@ int main(int argc, char *argv[])
 	int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
 	int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 
-	printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
-	printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+	pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
+	pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
 
 	/*
 	 * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 86036a59a668..2afa6618b396 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -130,7 +130,7 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
 		break;
 	default:
-		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+		TEST_FAIL("Page table levels must be 2, 3, or 4");
 	}
 
 	*ptep = paddr | 3;
@@ -173,20 +173,19 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 			goto unmapped_gva;
 		break;
 	default:
-		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+		TEST_FAIL("Page table levels must be 2, 3, or 4");
 	}
 
 	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
 
 unmapped_gva:
-	TEST_ASSERT(false, "No mapping for vm virtual address, "
-		    "gva: 0x%lx", gva);
+	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
 	exit(1);
 }
 
 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
 {
-#ifdef DEBUG_VM
+#ifdef DEBUG
 	static const char * const type[] = { "", "pud", "pmd", "pte" };
 	uint64_t pte, *ptep;
 
@@ -197,7 +196,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
 		ptep = addr_gpa2hva(vm, pte);
 		if (!*ptep)
 			continue;
-		printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+		fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
 		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
 	}
 #endif
@@ -215,7 +214,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		ptep = addr_gpa2hva(vm, pgd);
 		if (!*ptep)
 			continue;
-		printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+		fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
 		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
 	}
 }
@@ -262,11 +261,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 
 	switch (vm->mode) {
 	case VM_MODE_P52V48_4K:
-		TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
-				   "with 52-bit physical address ranges");
+		TEST_FAIL("AArch64 does not support 4K sized pages "
+			  "with 52-bit physical address ranges");
 	case VM_MODE_PXXV48_4K:
-		TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
-				   "with ANY-bit physical address ranges");
+		TEST_FAIL("AArch64 does not support 4K sized pages "
+			  "with ANY-bit physical address ranges");
 	case VM_MODE_P52V48_64K:
 		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
 		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
@@ -288,7 +287,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
 		break;
 	default:
-		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
 	}
 
 	sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
@@ -333,3 +332,21 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
 	aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
 }
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	int i;
+
+	TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+		    "  num: %u\n", num);
+
+	va_start(ap, num);
+
+	for (i = 0; i < num; i++) {
+		set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
+			va_arg(ap, uint64_t));
+	}
+
+	va_end(ap);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 6cd91970fbad..c8e0ec20d3bf 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -62,7 +62,7 @@ void ucall_init(struct kvm_vm *vm, void *arg)
 		if (ucall_mmio_init(vm, start + offset))
 			return;
 	}
-	TEST_ASSERT(false, "Can't find a ucall mmio address");
+	TEST_FAIL("Can't find a ucall mmio address");
 }
 
 void ucall_uninit(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index d1cf9f6e0e6b..5ebbd0d6b472 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -82,8 +82,10 @@ test_assert(bool exp, const char *exp_str,
 		}
 		va_end(ap);
 
-		if (errno == EACCES)
-			ksft_exit_skip("Access denied - Exiting.\n");
+		if (errno == EACCES) {
+			print_skip("Access denied - Exiting");
+			exit(KSFT_SKIP);
+		}
 		exit(254);
 	}
 
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
index eaf351cc7e7f..fedb2a741f0b 100644
--- a/tools/testing/selftests/kvm/lib/io.c
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -61,9 +61,9 @@ ssize_t test_write(int fd, const void *buf, size_t count)
 			continue;
 
 		case 0:
-			TEST_ASSERT(false, "Unexpected EOF,\n"
-				    "  rc: %zi num_written: %zi num_left: %zu",
-				    rc, num_written, num_left);
+			TEST_FAIL("Unexpected EOF,\n"
+				  "  rc: %zi num_written: %zi num_left: %zu",
+				  rc, num_written, num_left);
 			break;
 
 		default:
@@ -138,9 +138,9 @@ ssize_t test_read(int fd, void *buf, size_t count)
 			break;
 
 		case 0:
-			TEST_ASSERT(false, "Unexpected EOF,\n"
-				    "  rc: %zi num_read: %zi num_left: %zu",
-				    rc, num_read, num_left);
+			TEST_FAIL("Unexpected EOF,\n"
+				  "   rc: %zi num_read: %zi num_left: %zu",
+				  rc, num_read, num_left);
 			break;
 
 		default:
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a6dd0401eb50..8a3523d4434f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -92,7 +92,7 @@ static void vm_open(struct kvm_vm *vm, int perm)
 		exit(KSFT_SKIP);
 
 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
-		fprintf(stderr, "immediate_exit not available, skipping test\n");
+		print_skip("immediate_exit not available");
 		exit(KSFT_SKIP);
 	}
 
@@ -113,6 +113,25 @@ const char * const vm_guest_mode_string[] = {
 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
 	       "Missing new mode strings?");
 
+struct vm_guest_mode_params {
+	unsigned int pa_bits;
+	unsigned int va_bits;
+	unsigned int page_size;
+	unsigned int page_shift;
+};
+
+static const struct vm_guest_mode_params vm_guest_mode_params[] = {
+	{ 52, 48,  0x1000, 12 },
+	{ 52, 48, 0x10000, 16 },
+	{ 48, 48,  0x1000, 12 },
+	{ 48, 48, 0x10000, 16 },
+	{ 40, 48,  0x1000, 12 },
+	{ 40, 48, 0x10000, 16 },
+	{  0,  0,  0x1000, 12 },
+};
+_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
+	       "Missing new mode params?");
+
 /*
  * VM Create
  *
@@ -136,7 +155,8 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
 	struct kvm_vm *vm;
 
-	DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
+		 vm_guest_mode_string(mode), phy_pages, perm);
 
 	vm = calloc(1, sizeof(*vm));
 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
@@ -144,67 +164,45 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	vm->mode = mode;
 	vm->type = 0;
 
+	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
+	vm->va_bits = vm_guest_mode_params[mode].va_bits;
+	vm->page_size = vm_guest_mode_params[mode].page_size;
+	vm->page_shift = vm_guest_mode_params[mode].page_shift;
+
 	/* Setup mode specific traits. */
 	switch (vm->mode) {
 	case VM_MODE_P52V48_4K:
 		vm->pgtable_levels = 4;
-		vm->pa_bits = 52;
-		vm->va_bits = 48;
-		vm->page_size = 0x1000;
-		vm->page_shift = 12;
 		break;
 	case VM_MODE_P52V48_64K:
 		vm->pgtable_levels = 3;
-		vm->pa_bits = 52;
-		vm->va_bits = 48;
-		vm->page_size = 0x10000;
-		vm->page_shift = 16;
 		break;
 	case VM_MODE_P48V48_4K:
 		vm->pgtable_levels = 4;
-		vm->pa_bits = 48;
-		vm->va_bits = 48;
-		vm->page_size = 0x1000;
-		vm->page_shift = 12;
 		break;
 	case VM_MODE_P48V48_64K:
 		vm->pgtable_levels = 3;
-		vm->pa_bits = 48;
-		vm->va_bits = 48;
-		vm->page_size = 0x10000;
-		vm->page_shift = 16;
 		break;
 	case VM_MODE_P40V48_4K:
 		vm->pgtable_levels = 4;
-		vm->pa_bits = 40;
-		vm->va_bits = 48;
-		vm->page_size = 0x1000;
-		vm->page_shift = 12;
 		break;
 	case VM_MODE_P40V48_64K:
 		vm->pgtable_levels = 3;
-		vm->pa_bits = 40;
-		vm->va_bits = 48;
-		vm->page_size = 0x10000;
-		vm->page_shift = 16;
 		break;
 	case VM_MODE_PXXV48_4K:
 #ifdef __x86_64__
 		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
 		TEST_ASSERT(vm->va_bits == 48, "Linear address width "
 			    "(%d bits) not supported", vm->va_bits);
+		pr_debug("Guest physical address width detected: %d\n",
+			 vm->pa_bits);
 		vm->pgtable_levels = 4;
-		vm->page_size = 0x1000;
-		vm->page_shift = 12;
-		DEBUG("Guest physical address width detected: %d\n",
-		      vm->pa_bits);
 #else
-		TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
-			    "non-x86 platforms");
+		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
 #endif
 		break;
 	default:
-		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
 	}
 
 #ifdef __aarch64__
@@ -266,7 +264,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 			    "  rc: %i errno: %i\n"
 			    "  slot: %u flags: 0x%x\n"
-			    "  guest_phys_addr: 0x%lx size: 0x%lx",
+			    "  guest_phys_addr: 0x%llx size: 0x%llx",
 			    ret, errno, region->region.slot,
 			    region->region.flags,
 			    region->region.guest_phys_addr,
@@ -281,7 +279,7 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
 
 	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
 	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
-		    strerror(-ret));
+		    __func__, strerror(-ret));
 }
 
 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
@@ -294,7 +292,7 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
 
 	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
 	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
-		    strerror(-ret));
+		    __func__, strerror(-ret));
 }
 
 /*
@@ -582,6 +580,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
 	size_t alignment;
 
+	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
+		"Number of guest pages is not compatible with the host. "
+		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
+
 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
 		"address not on a page boundary.\n"
 		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
@@ -600,7 +602,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	region = (struct userspace_mem_region *) userspace_mem_region_find(
 		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
 	if (region != NULL)
-		TEST_ASSERT(false, "overlapping userspace_mem_region already "
+		TEST_FAIL("overlapping userspace_mem_region already "
 			"exists\n"
 			"  requested guest_paddr: 0x%lx npages: 0x%lx "
 			"page_size: 0x%x\n"
@@ -616,7 +618,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			break;
 	}
 	if (region != NULL)
-		TEST_ASSERT(false, "A mem region with the requested slot "
+		TEST_FAIL("A mem region with the requested slot "
 			"already exists.\n"
 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
 			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
@@ -720,7 +722,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
 			"  requested slot: %u\n", memslot);
 		fputs("---- vm dump ----\n", stderr);
 		vm_dump(stderr, vm, 2);
-		TEST_ASSERT(false, "Mem region not found");
+		TEST_FAIL("Mem region not found");
 	}
 
 	return region;
@@ -757,6 +759,36 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 }
 
 /*
+ * VM Memory Region Move
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   slot - Slot of the memory region to move
+ *   new_gpa - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Change the gpa of a memory region.
+ */
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
+{
+	struct userspace_mem_region *region;
+	int ret;
+
+	region = memslot2region(vm, slot);
+
+	region->region.guest_phys_addr = new_gpa;
+
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
+		    ret, errno, slot, new_gpa);
+}
+
+/*
  * VCPU mmap Size
  *
  * Input Args: None
@@ -808,7 +840,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
 	/* Confirm a vcpu with the specified id doesn't already exist. */
 	vcpu = vcpu_find(vm, vcpuid);
 	if (vcpu != NULL)
-		TEST_ASSERT(false, "vcpu with the specified id "
+		TEST_FAIL("vcpu with the specified id "
 			"already exists,\n"
 			"  requested vcpuid: %u\n"
 			"  existing vcpuid: %u state: %p",
@@ -901,8 +933,7 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
 	} while (pgidx_start != 0);
 
 no_va_found:
-	TEST_ASSERT(false, "No vaddr of specified pages available, "
-		"pages: 0x%lx", pages);
+	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
 
 	/* NOT REACHED */
 	return -1;
@@ -982,21 +1013,21 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
  *   vm - Virtual Machine
  *   vaddr - Virtuall address to map
  *   paddr - VM Physical Address
- *   size - The size of the range to map
+ *   npages - The number of pages to map
  *   pgd_memslot - Memory region slot for new virtual translation tables
  *
  * Output Args: None
  *
  * Return: None
  *
- * Within the VM given by vm, creates a virtual translation for the
- * page range starting at vaddr to the page range starting at paddr.
+ * Within the VM given by @vm, creates a virtual translation for
+ * @npages starting at @vaddr to the page range starting at @paddr.
  */
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	      size_t size, uint32_t pgd_memslot)
+	      unsigned int npages, uint32_t pgd_memslot)
 {
 	size_t page_size = vm->page_size;
-	size_t npages = size / page_size;
+	size_t size = npages * page_size;
 
 	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
@@ -1037,7 +1068,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 				+ (gpa - region->region.guest_phys_addr));
 	}
 
-	TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+	TEST_FAIL("No vm physical memory at 0x%lx", gpa);
 	return NULL;
 }
 
@@ -1071,8 +1102,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 				+ (hva - (uintptr_t) region->host_mem));
 	}
 
-	TEST_ASSERT(false, "No mapping to a guest physical address, "
-		"hva: %p", hva);
+	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
 	return -1;
 }
 
@@ -1703,3 +1733,43 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm)
 {
 	return vm->max_gfn;
 }
+
+static unsigned int vm_calc_num_pages(unsigned int num_pages,
+				      unsigned int page_shift,
+				      unsigned int new_page_shift,
+				      bool ceil)
+{
+	unsigned int n = 1 << (new_page_shift - page_shift);
+
+	if (page_shift >= new_page_shift)
+		return num_pages * (1 << (page_shift - new_page_shift));
+
+	return num_pages / n + !!(ceil && num_pages % n);
+}
+
+static inline int getpageshift(void)
+{
+	return __builtin_ffs(getpagesize()) - 1;
+}
+
+unsigned int
+vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+	return vm_calc_num_pages(num_guest_pages,
+				 vm_guest_mode_params[mode].page_shift,
+				 getpageshift(), true);
+}
+
+unsigned int
+vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
+{
+	return vm_calc_num_pages(num_host_pages, getpageshift(),
+				 vm_guest_mode_params[mode].page_shift, false);
+}
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
+{
+	unsigned int n;
+	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
+	return vm_adjust_num_guest_pages(mode, n);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index ac50c42750cf..ca56a0133127 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -12,17 +12,6 @@
 
 #define KVM_DEV_PATH		"/dev/kvm"
 
-#ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE		8
-#endif
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG		(BITS_PER_BYTE * sizeof(long))
-#endif
-
-#define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
-
 struct userspace_mem_region {
 	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
@@ -64,8 +53,56 @@ struct kvm_vm {
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   vm     - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * Register Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   regs   - Registers
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by @regs, to the FILE stream
+ * given by @stream.
+ */
 void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+
+/*
+ * System Register Dump
+ *
+ * Input Args:
+ *   stream - Output FILE stream
+ *   sregs  - System registers
+ *   indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by @sregs, to the FILE stream
+ * given by @stream.
+ */
 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
 
 struct userspace_mem_region *
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 32a02360b1eb..8d94961bd046 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -51,22 +51,6 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
 		| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
 }
 
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- *   vm - Virtual Machine
- *   gva - VM Virtual Address
- *   gpa - VM Physical Address
- *   memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a virtual translation for the page
- * starting at vaddr to the page starting at paddr.
- */
 void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
 		 uint32_t memslot)
 {
@@ -107,26 +91,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
 	entry[idx] = gpa;
 }
 
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- *   vm - Virtual Machine
- *   gpa - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- *   Equivalent VM physical address
- *
- * Translates the VM virtual address given by gva to a VM physical
- * address and then locates the memory region containing the VM
- * physical address, within the VM given by vm.  When found, the host
- * virtual address providing the memory to the vm physical address is
- * returned.
- * A TEST_ASSERT failure occurs if no region containing translated
- * VM virtual address exists.
- */
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	int ri, idx;
@@ -196,21 +160,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	virt_dump_region(stream, vm, indent, vm->pgd);
 }
 
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- *   vcpuid - The id of the single VCPU to add to the VM.
- *   extra_mem_pages - The size of extra memories to add (this will
- *                     decide how much extra space we will need to
- *                     setup the page tables using mem slot 0)
- *   guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- *   Pointer to opaque structure that describes the created VM.
- */
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 				 void *guest_code)
 {
@@ -231,13 +180,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	return vm;
 }
 
-/*
- * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW)
- *
- * Input Args:
- *   vcpuid - The id of the VCPU to add to the VM.
- *   guest_code - The vCPU's entry point
- */
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
 	size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
@@ -269,6 +211,26 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	run->psw_addr = (uintptr_t)guest_code;
 }
 
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+	int i;
+
+	TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	for (i = 0; i < num; i++)
+		regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 {
 	struct vcpu *vcpu = vm->vcpu_head;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
new file mode 100644
index 000000000000..689e97c27ee2
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/test_util.c
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <assert.h>
+#include "test_util.h"
+
+/*
+ * Parses "[0-9]+[kmgt]?".
+ */
+size_t parse_size(const char *size)
+{
+	size_t base;
+	char *scale;
+	int shift = 0;
+
+	TEST_ASSERT(size && isdigit(size[0]), "Need at least one digit in '%s'", size);
+
+	base = strtoull(size, &scale, 0);
+
+	TEST_ASSERT(base != ULLONG_MAX, "Overflow parsing size!");
+
+	switch (tolower(*scale)) {
+	case 't':
+		shift = 40;
+		break;
+	case 'g':
+		shift = 30;
+		break;
+	case 'm':
+		shift = 20;
+		break;
+	case 'k':
+		shift = 10;
+		break;
+	case 'b':
+	case '\0':
+		shift = 0;
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown size letter %c", *scale);
+	}
+
+	TEST_ASSERT((base << shift) >> shift == base, "Overflow scaling size!");
+
+	return base << shift;
+}
+
+int64_t timespec_to_ns(struct timespec ts)
+{
+	return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec;
+}
+
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns)
+{
+	struct timespec res;
+
+	res.tv_nsec = ts.tv_nsec + ns;
+	res.tv_sec = ts.tv_sec + res.tv_nsec / 1000000000LL;
+	res.tv_nsec %= 1000000000LL;
+
+	return res;
+}
+
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2)
+{
+	int64_t ns1 = timespec_to_ns(ts1);
+	int64_t ns2 = timespec_to_ns(ts2);
+	return timespec_add_ns((struct timespec){0}, ns1 + ns2);
+}
+
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
+{
+	int64_t ns1 = timespec_to_ns(ts1);
+	int64_t ns2 = timespec_to_ns(ts2);
+	return timespec_add_ns((struct timespec){0}, ns1 - ns2);
+}
+
+void print_skip(const char *fmt, ...)
+{
+	va_list ap;
+
+	assert(fmt);
+	va_start(ap, fmt);
+	vprintf(fmt, ap);
+	va_end(ap);
+	puts(", skipping test");
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 683d3bdb8f6a..f6eb34eaa0d2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -77,20 +77,6 @@ struct pageTableEntry {
 	uint64_t execute_disable:1;
 };
 
-/* Register Dump
- *
- * Input Args:
- *   indent - Left margin indent amount
- *   regs - register
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the state of the registers given by regs, to the FILE stream
- * given by steam.
- */
 void regs_dump(FILE *stream, struct kvm_regs *regs,
 	       uint8_t indent)
 {
@@ -115,19 +101,20 @@ void regs_dump(FILE *stream, struct kvm_regs *regs,
 		regs->rip, regs->rflags);
 }
 
-/* Segment Dump
+/*
+ * Segment Dump
  *
  * Input Args:
- *   indent - Left margin indent amount
+ *   stream  - Output FILE stream
  *   segment - KVM segment
+ *   indent  - Left margin indent amount
  *
- * Output Args:
- *   stream - Output FILE stream
+ * Output Args: None
  *
  * Return: None
  *
- * Dumps the state of the KVM segment given by segment, to the FILE stream
- * given by steam.
+ * Dumps the state of the KVM segment given by @segment, to the FILE stream
+ * given by @stream.
  */
 static void segment_dump(FILE *stream, struct kvm_segment *segment,
 			 uint8_t indent)
@@ -146,19 +133,20 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
 		segment->unusable, segment->padding);
 }
 
-/* dtable Dump
+/*
+ * dtable Dump
  *
  * Input Args:
- *   indent - Left margin indent amount
+ *   stream - Output FILE stream
  *   dtable - KVM dtable
+ *   indent - Left margin indent amount
  *
- * Output Args:
- *   stream - Output FILE stream
+ * Output Args: None
  *
  * Return: None
  *
- * Dumps the state of the KVM dtable given by dtable, to the FILE stream
- * given by steam.
+ * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
+ * given by @stream.
  */
 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
 			uint8_t indent)
@@ -169,20 +157,6 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
 		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
 }
 
-/* System Register Dump
- *
- * Input Args:
- *   indent - Left margin indent amount
- *   sregs - System registers
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the state of the system registers given by sregs, to the FILE stream
- * given by steam.
- */
 void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 		uint8_t indent)
 {
@@ -240,21 +214,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 	}
 }
 
-/* VM Virtual Page Map
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vaddr - VM Virtual Address
- *   paddr - VM Physical Address
- *   pgd_memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a virtual translation for the page
- * starting at vaddr to the page starting at paddr.
- */
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	uint32_t pgd_memslot)
 {
@@ -326,20 +285,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	pte[index[0]].present = 1;
 }
 
-/* Virtual Translation Tables Dump
- *
- * Input Args:
- *   vm - Virtual Machine
- *   indent - Left margin indent amount
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps to the FILE stream given by stream, the contents of all the
- * virtual translation tables for the VM given by vm.
- */
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
 	struct pageMapL4Entry *pml4e, *pml4e_start;
@@ -421,7 +366,8 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	}
 }
 
-/* Set Unusable Segment
+/*
+ * Set Unusable Segment
  *
  * Input Args: None
  *
@@ -430,7 +376,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
  *
  * Return: None
  *
- * Sets the segment register pointed to by segp to an unusable state.
+ * Sets the segment register pointed to by @segp to an unusable state.
  */
 static void kvm_seg_set_unusable(struct kvm_segment *segp)
 {
@@ -460,7 +406,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
 }
 
 
-/* Set Long Mode Flat Kernel Code Segment
+/*
+ * Set Long Mode Flat Kernel Code Segment
  *
  * Input Args:
  *   vm - VM whose GDT is being filled, or NULL to only write segp
@@ -471,8 +418,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
  *
  * Return: None
  *
- * Sets up the KVM segment pointed to by segp, to be a code segment
- * with the selector value given by selector.
+ * Sets up the KVM segment pointed to by @segp, to be a code segment
+ * with the selector value given by @selector.
  */
 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
 	struct kvm_segment *segp)
@@ -491,7 +438,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
 		kvm_seg_fill_gdt_64bit(vm, segp);
 }
 
-/* Set Long Mode Flat Kernel Data Segment
+/*
+ * Set Long Mode Flat Kernel Data Segment
  *
  * Input Args:
  *   vm - VM whose GDT is being filled, or NULL to only write segp
@@ -502,8 +450,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
  *
  * Return: None
  *
- * Sets up the KVM segment pointed to by segp, to be a data segment
- * with the selector value given by selector.
+ * Sets up the KVM segment pointed to by @segp, to be a data segment
+ * with the selector value given by @selector.
  */
 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 	struct kvm_segment *segp)
@@ -521,24 +469,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 		kvm_seg_fill_gdt_64bit(vm, segp);
 }
 
-/* Address Guest Virtual to Guest Physical
- *
- * Input Args:
- *   vm - Virtual Machine
- *   gpa - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- *   Equivalent VM physical address
- *
- * Translates the VM virtual address given by gva to a VM physical
- * address and then locates the memory region containing the VM
- * physical address, within the VM given by vm.  When found, the host
- * virtual address providing the memory to the vm physical address is returned.
- * A TEST_ASSERT failure occurs if no region containing translated
- * VM virtual address exists.
- */
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	uint16_t index[4];
@@ -576,8 +506,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
 
 unmapped_gva:
-	TEST_ASSERT(false, "No mapping for vm virtual address, "
-		    "gva: 0x%lx", gva);
+	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
 	exit(EXIT_FAILURE);
 }
 
@@ -634,18 +563,13 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
 		break;
 
 	default:
-		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
 	}
 
 	sregs.cr3 = vm->pgd;
 	vcpu_sregs_set(vm, vcpuid, &sregs);
 }
-/* Adds a vCPU with reasonable defaults (i.e., a stack)
- *
- * Input Args:
- *   vcpuid - The id of the VCPU to add to the VM.
- *   guest_code - The vCPU's entry point
- */
+
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
 	struct kvm_mp_state mp_state;
@@ -670,7 +594,8 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
 }
 
-/* Allocate an instance of struct kvm_cpuid2
+/*
+ * Allocate an instance of struct kvm_cpuid2
  *
  * Input Args: None
  *
@@ -703,7 +628,8 @@ static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
 	return cpuid;
 }
 
-/* KVM Supported CPUID Get
+/*
+ * KVM Supported CPUID Get
  *
  * Input Args: None
  *
@@ -735,11 +661,12 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 	return cpuid;
 }
 
-/* Locate a cpuid entry.
+/*
+ * Locate a cpuid entry.
  *
  * Input Args:
- *   cpuid: The cpuid.
  *   function: The function of the cpuid entry to find.
+ *   index: The index of the cpuid entry.
  *
  * Output Args: None
  *
@@ -766,7 +693,8 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
 	return entry;
 }
 
-/* VM VCPU CPUID Set
+/*
+ * VM VCPU CPUID Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -793,20 +721,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 
 }
 
-/* Create a VM with reasonable defaults
- *
- * Input Args:
- *   vcpuid - The id of the single VCPU to add to the VM.
- *   extra_mem_pages - The size of extra memories to add (this will
- *                     decide how much extra space we will need to
- *                     setup the page tables using mem slot 0)
- *   guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- *   Pointer to opaque structure that describes the created VM.
- */
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 				 void *guest_code)
 {
@@ -837,7 +751,8 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	return vm;
 }
 
-/* VCPU Get MSR
+/*
+ * VCPU Get MSR
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -869,7 +784,8 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
 	return buffer.entry.data;
 }
 
-/* _VCPU Set MSR
+/*
+ * _VCPU Set MSR
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -902,7 +818,8 @@ int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 	return r;
 }
 
-/* VCPU Set MSR
+/*
+ * VCPU Set MSR
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -926,22 +843,6 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 		"  rc: %i errno: %i", r, errno);
 }
 
-/* VM VCPU Args Set
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   num - number of arguments
- *   ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args.  Each of the variable args is expected to
- * be of type uint64_t.
- */
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 {
 	va_list ap;
@@ -976,22 +877,6 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 	va_end(ap);
 }
 
-/*
- * VM VCPU Dump
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   indent - Left margin indent amount
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 {
 	struct kvm_regs regs;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 6e05a8fc3fe0..c42401068373 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -154,7 +154,7 @@ void nested_svm_check_supported(void)
 		kvm_get_supported_cpuid_entry(0x80000001);
 
 	if (!(entry->ecx & CPUID_SVM)) {
-		fprintf(stderr, "nested SVM not enabled, skipping test\n");
+		print_skip("nested SVM not enabled");
 		exit(KSFT_SKIP);
 	}
 }
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 7aaa99ca4dbc..6f17f69394be 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -191,7 +191,7 @@ bool load_vmcs(struct vmx_pages *vmx)
 		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
 				  vmx->enlightened_vmcs))
 			return false;
-		current_evmcs->revision_id = vmcs_revision();
+		current_evmcs->revision_id = EVMCS_VERSION;
 	}
 
 	return true;
@@ -381,7 +381,7 @@ void nested_vmx_check_supported(void)
 	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
 	if (!(entry->ecx & CPUID_VMX)) {
-		fprintf(stderr, "nested VMX not enabled, skipping test\n");
+		print_skip("nested VMX not enabled");
 		exit(KSFT_SKIP);
 	}
 }
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9edaa9a134ce..9f49ead380ab 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -40,7 +40,7 @@ int main(int argc, char *argv[])
 
 	maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
 	if (!maxsize) {
-		fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n");
+		print_skip("CAP_S390_MEM_OP not supported");
 		exit(KSFT_SKIP);
 	}
 	if (maxsize > sizeof(mem1))
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index 1485bc6c8999..b143db6d8693 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -20,29 +20,42 @@ struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
 
 struct kvm_vm *vm;
 struct kvm_run *run;
-struct kvm_sync_regs *regs;
-static uint64_t regs_null[16];
-
-static uint64_t crs[16] = { 0x40000ULL,
-			    0x42000ULL,
-			    0, 0, 0, 0, 0,
-			    0x43000ULL,
-			    0, 0, 0, 0, 0,
-			    0x44000ULL,
-			    0, 0
-};
+struct kvm_sync_regs *sync_regs;
+static uint8_t regs_null[512];
 
 static void guest_code_initial(void)
 {
-	/* Round toward 0 */
-	uint32_t fpc = 0x11;
+	/* set several CRs to "safe" value */
+	unsigned long cr2_59 = 0x10;	/* enable guarded storage */
+	unsigned long cr8_63 = 0x1;	/* monitor mask = 1 */
+	unsigned long cr10 = 1;		/* PER START */
+	unsigned long cr11 = -1;	/* PER END */
+
 
 	/* Dirty registers */
 	asm volatile (
-		"	lctlg	0,15,%0\n"
-		"	sfpc	%1\n"
-		: : "Q" (crs), "d" (fpc));
-	GUEST_SYNC(0);
+		"	lghi	2,0x11\n"	/* Round toward 0 */
+		"	sfpc	2\n"		/* set fpc to !=0 */
+		"	lctlg	2,2,%0\n"
+		"	lctlg	8,8,%1\n"
+		"	lctlg	10,10,%2\n"
+		"	lctlg	11,11,%3\n"
+		/* now clobber some general purpose regs */
+		"	llihh	0,0xffff\n"
+		"	llihl	1,0x5555\n"
+		"	llilh	2,0xaaaa\n"
+		"	llill	3,0x0000\n"
+		/* now clobber a floating point reg */
+		"	lghi	4,0x1\n"
+		"	cdgbr	0,4\n"
+		/* now clobber an access reg */
+		"	sar	9,4\n"
+		/* We embed diag 501 here to control register content */
+		"	diag 0,0,0x501\n"
+		:
+		: "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+		/* no clobber list as this should not return */
+		);
 }
 
 static void test_one_reg(uint64_t id, uint64_t value)
@@ -53,7 +66,7 @@ static void test_one_reg(uint64_t id, uint64_t value)
 	reg.addr = (uintptr_t)&eval_reg;
 	reg.id = id;
 	vcpu_get_reg(vm, VCPU_ID, &reg);
-	TEST_ASSERT(eval_reg == value, "value == %s", value);
+	TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
 }
 
 static void assert_noirq(void)
@@ -87,6 +100,31 @@ static void assert_clear(void)
 
 	vcpu_fpu_get(vm, VCPU_ID, &fpu);
 	TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+	/* sync regs */
+	TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+		    "gprs0-15 == 0 (sync_regs)");
+
+	TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+		    "acrs0-15 == 0 (sync_regs)");
+
+	TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+		    "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(void)
+{
+	TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+		    "gpr0 == 0xffff000000000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+		    "gpr1 == 0x0000555500000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+		    "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+	TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+		    "gpr3 == 0x0000000000000000 (sync_regs)");
+	TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+		    "fpr0 == 0f1 (sync_regs)");
+	TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
 }
 
 static void assert_initial(void)
@@ -94,12 +132,32 @@ static void assert_initial(void)
 	struct kvm_sregs sregs;
 	struct kvm_fpu fpu;
 
+	/* KVM_GET_SREGS */
 	vcpu_sregs_get(vm, VCPU_ID, &sregs);
-	TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0");
-	TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000");
+	TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+	TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+		    "cr14 == 0xC2000000 (KVM_GET_SREGS)");
 	TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
-		    "cr1-13 == 0");
-	TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0");
+		    "cr1-13 == 0 (KVM_GET_SREGS)");
+	TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+	/* sync regs */
+	TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+		    "cr14 == 0xC2000000 (sync_regs)");
+	TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+		    "cr1-13 == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+	TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+	/* kvm_run */
+	TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+	TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
 
 	vcpu_fpu_get(vm, VCPU_ID, &fpu);
 	TEST_ASSERT(!fpu.fpc, "fpc == 0");
@@ -111,9 +169,19 @@ static void assert_initial(void)
 	test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
 }
 
+static void assert_normal_noclear(void)
+{
+	TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+	TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
 static void assert_normal(void)
 {
 	test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+	TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+			"pft == 0xff.....  (sync_regs)");
 	assert_noirq();
 }
 
@@ -134,53 +202,67 @@ static void inject_irq(int cpu_id)
 
 static void test_normal(void)
 {
-	printf("Testing normal reset\n");
+	pr_info("Testing normal reset\n");
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
 	run = vcpu_state(vm, VCPU_ID);
-	regs = &run->s.regs;
+	sync_regs = &run->s.regs;
 
 	vcpu_run(vm, VCPU_ID);
 
 	inject_irq(VCPU_ID);
 
 	vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+
+	/* must clears */
 	assert_normal();
+	/* must not clears */
+	assert_normal_noclear();
+	assert_initial_noclear();
+
 	kvm_vm_free(vm);
 }
 
 static void test_initial(void)
 {
-	printf("Testing initial reset\n");
+	pr_info("Testing initial reset\n");
 	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
 	run = vcpu_state(vm, VCPU_ID);
-	regs = &run->s.regs;
+	sync_regs = &run->s.regs;
 
 	vcpu_run(vm, VCPU_ID);
 
 	inject_irq(VCPU_ID);
 
 	vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+
+	/* must clears */
 	assert_normal();
 	assert_initial();
+	/* must not clears */
+	assert_initial_noclear();
+
 	kvm_vm_free(vm);
 }
 
 static void test_clear(void)
 {
-	printf("Testing clear reset\n");
+	pr_info("Testing clear reset\n");
 	vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
 	run = vcpu_state(vm, VCPU_ID);
-	regs = &run->s.regs;
+	sync_regs = &run->s.regs;
 
 	vcpu_run(vm, VCPU_ID);
 
 	inject_irq(VCPU_ID);
 
 	vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+
+	/* must clears */
 	assert_normal();
 	assert_initial();
 	assert_clear();
+
 	kvm_vm_free(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index b705637ca14b..5731ccf34917 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -42,6 +42,13 @@ static void guest_code(void)
 		    " values did not match: 0x%llx, 0x%llx\n", \
 		    left->reg, right->reg)
 
+#define REG_COMPARE32(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%x, 0x%x\n", \
+		    left->reg, right->reg)
+
+
 static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
 {
 	int i;
@@ -55,7 +62,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
 	int i;
 
 	for (i = 0; i < 16; i++)
-		REG_COMPARE(acrs[i]);
+		REG_COMPARE32(acrs[i]);
 
 	for (i = 0; i < 16; i++)
 		REG_COMPARE(crs[i]);
@@ -79,7 +86,7 @@ int main(int argc, char *argv[])
 
 	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
 	if (!cap) {
-		fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n");
+		print_skip("CAP_SYNC_REGS not supported");
 		exit(KSFT_SKIP);
 	}
 
@@ -155,7 +162,7 @@ int main(int argc, char *argv[])
 		    "r11 sync regs value incorrect 0x%llx.",
 		    run->s.regs.gprs[11]);
 	TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
-		    "acr0 sync regs value incorrect 0x%llx.",
+		    "acr0 sync regs value incorrect 0x%x.",
 		    run->s.regs.acrs[0]);
 
 	vcpu_regs_get(vm, VCPU_ID, &regs);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
new file mode 100644
index 000000000000..fcc840088c91
--- /dev/null
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * steal/stolen time test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define NR_VCPUS		4
+#define ST_GPA_BASE		(1 << 30)
+#define MIN_RUN_DELAY_NS	200000UL
+
+static void *st_gva[NR_VCPUS];
+static uint64_t guest_stolen_time[NR_VCPUS];
+
+#if defined(__x86_64__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE		((sizeof(struct kvm_steal_time) + 63) & ~63)
+
+static void check_status(struct kvm_steal_time *st)
+{
+	GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+	GUEST_ASSERT(READ_ONCE(st->flags) == 0);
+	GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+}
+
+static void guest_code(int cpu)
+{
+	struct kvm_steal_time *st = st_gva[cpu];
+	uint32_t version;
+
+	GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+
+	memset(st, 0, sizeof(*st));
+	GUEST_SYNC(0);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+	version = READ_ONCE(st->version);
+	check_status(st);
+	GUEST_SYNC(1);
+
+	check_status(st);
+	GUEST_ASSERT(version < READ_ONCE(st->version));
+	WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+	check_status(st);
+	GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+	int i;
+
+	if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
+	      KVM_FEATURE_STEAL_TIME)) {
+		print_skip("steal-time not supported");
+		exit(KSFT_SKIP);
+	}
+
+	for (i = 0; i < NR_VCPUS; ++i) {
+		int ret;
+
+		vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
+
+		/* ST_GPA_BASE is identity mapped */
+		st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+		sync_global_to_guest(vm, st_gva[i]);
+
+		ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+		TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+		vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
+	}
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+	int i;
+
+	pr_info("VCPU%d:\n", vcpuid);
+	pr_info("    steal:     %lld\n", st->steal);
+	pr_info("    version:   %d\n", st->version);
+	pr_info("    flags:     %d\n", st->flags);
+	pr_info("    preempted: %d\n", st->preempted);
+	pr_info("    u8_pad:    ");
+	for (i = 0; i < 3; ++i)
+		pr_info("%d", st->u8_pad[i]);
+	pr_info("\n    pad:       ");
+	for (i = 0; i < 11; ++i)
+		pr_info("%d", st->pad[i]);
+	pr_info("\n");
+}
+
+#elif defined(__aarch64__)
+
+/* PV_TIME_ST must have 64-byte alignment */
+#define STEAL_TIME_SIZE		((sizeof(struct st_time) + 63) & ~63)
+
+#define SMCCC_ARCH_FEATURES	0x80000001
+#define PV_TIME_FEATURES	0xc5000020
+#define PV_TIME_ST		0xc5000021
+
+struct st_time {
+	uint32_t rev;
+	uint32_t attr;
+	uint64_t st_time;
+};
+
+static int64_t smccc(uint32_t func, uint32_t arg)
+{
+	unsigned long ret;
+
+	asm volatile(
+		"mov	x0, %1\n"
+		"mov	x1, %2\n"
+		"hvc	#0\n"
+		"mov	%0, x0\n"
+	: "=r" (ret) : "r" (func), "r" (arg) :
+	  "x0", "x1", "x2", "x3");
+
+	return ret;
+}
+
+static void check_status(struct st_time *st)
+{
+	GUEST_ASSERT(READ_ONCE(st->rev) == 0);
+	GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+}
+
+static void guest_code(int cpu)
+{
+	struct st_time *st;
+	int64_t status;
+
+	status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
+	GUEST_ASSERT(status == 0);
+	status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
+	GUEST_ASSERT(status == 0);
+	status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
+	GUEST_ASSERT(status == 0);
+
+	status = smccc(PV_TIME_ST, 0);
+	GUEST_ASSERT(status != -1);
+	GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+
+	st = (struct st_time *)status;
+	GUEST_SYNC(0);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+	GUEST_SYNC(1);
+
+	check_status(st);
+	WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+	GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+	struct kvm_device_attr dev = {
+		.group = KVM_ARM_VCPU_PVTIME_CTRL,
+		.attr = KVM_ARM_VCPU_PVTIME_IPA,
+	};
+	int i, ret;
+
+	ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
+	if (ret != 0 && errno == ENXIO) {
+		print_skip("steal-time not supported");
+		exit(KSFT_SKIP);
+	}
+
+	for (i = 0; i < NR_VCPUS; ++i) {
+		uint64_t st_ipa;
+
+		vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+
+		dev.addr = (uint64_t)&st_ipa;
+
+		/* ST_GPA_BASE is identity mapped */
+		st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+		sync_global_to_guest(vm, st_gva[i]);
+
+		st_ipa = (ulong)st_gva[i] | 1;
+		ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+		TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+		st_ipa = (ulong)st_gva[i];
+		vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+
+		ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+		TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+	}
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+
+	pr_info("VCPU%d:\n", vcpuid);
+	pr_info("    rev:     %d\n", st->rev);
+	pr_info("    attr:    %d\n", st->attr);
+	pr_info("    st_time: %ld\n", st->st_time);
+}
+
+#endif
+
+static long get_run_delay(void)
+{
+	char path[64];
+	long val[2];
+	FILE *fp;
+
+	sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+	fp = fopen(path, "r");
+	fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+	fclose(fp);
+
+	return val[1];
+}
+
+static void *do_steal_time(void *arg)
+{
+	struct timespec ts, stop;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	stop = timespec_add_ns(ts, MIN_RUN_DELAY_NS);
+
+	while (1) {
+		clock_gettime(CLOCK_MONOTONIC, &ts);
+		if (timespec_to_ns(timespec_sub(ts, stop)) >= 0)
+			break;
+	}
+
+	return NULL;
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct ucall uc;
+
+	vcpu_args_set(vm, vcpuid, 1, vcpuid);
+
+	vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+
+	switch (get_ucall(vm, vcpuid, &uc)) {
+	case UCALL_SYNC:
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
+			    __FILE__, uc.args[1]);
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+	}
+}
+
+int main(int ac, char **av)
+{
+	struct kvm_vm *vm;
+	pthread_attr_t attr;
+	pthread_t thread;
+	cpu_set_t cpuset;
+	unsigned int gpages;
+	long stolen_time;
+	long run_delay;
+	bool verbose;
+	int i;
+
+	verbose = ac > 1 && (!strncmp(av[1], "-v", 3) || !strncmp(av[1], "--verbose", 10));
+
+	/* Set CPU affinity so we can force preemption of the VCPU */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	pthread_attr_init(&attr);
+	pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+	/* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
+	vm = vm_create_default(0, 0, guest_code);
+	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+	virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+	ucall_init(vm, NULL);
+
+	/* Add the rest of the VCPUs */
+	for (i = 1; i < NR_VCPUS; ++i)
+		vm_vcpu_add_default(vm, i, guest_code);
+
+	steal_time_init(vm);
+
+	/* Run test on each VCPU */
+	for (i = 0; i < NR_VCPUS; ++i) {
+		/* First VCPU run initializes steal-time */
+		run_vcpu(vm, i);
+
+		/* Second VCPU run, expect guest stolen time to be <= run_delay */
+		run_vcpu(vm, i);
+		sync_global_from_guest(vm, guest_stolen_time[i]);
+		stolen_time = guest_stolen_time[i];
+		run_delay = get_run_delay();
+		TEST_ASSERT(stolen_time <= run_delay,
+			    "Expected stolen time <= %ld, got %ld",
+			    run_delay, stolen_time);
+
+		/* Steal time from the VCPU. The steal time thread has the same CPU affinity as the VCPUs. */
+		run_delay = get_run_delay();
+		pthread_create(&thread, &attr, do_steal_time, NULL);
+		do
+			pthread_yield();
+		while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
+		pthread_join(thread, NULL);
+		run_delay = get_run_delay() - run_delay;
+		TEST_ASSERT(run_delay >= MIN_RUN_DELAY_NS,
+			    "Expected run_delay >= %ld, got %ld",
+			    MIN_RUN_DELAY_NS, run_delay);
+
+		/* Run VCPU again to confirm stolen time is consistent with run_delay */
+		run_vcpu(vm, i);
+		sync_global_from_guest(vm, guest_stolen_time[i]);
+		stolen_time = guest_stolen_time[i] - stolen_time;
+		TEST_ASSERT(stolen_time >= run_delay,
+			    "Expected stolen time >= %ld, got %ld",
+			    run_delay, stolen_time);
+
+		if (verbose) {
+			pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
+				guest_stolen_time[i], stolen_time);
+			if (stolen_time == run_delay)
+				pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
+			pr_info("\n");
+			steal_time_dump(vm, i);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 63cc9c3f5ab6..140e91901582 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -72,7 +72,7 @@ int main(int argc, char *argv[])
 
 	entry = kvm_get_supported_cpuid_entry(1);
 	if (!(entry->ecx & X86_FEATURE_XSAVE)) {
-		printf("XSAVE feature not supported, skipping test\n");
+		print_skip("XSAVE feature not supported");
 		return 0;
 	}
 
@@ -101,12 +101,12 @@ int main(int argc, char *argv[])
 			vcpu_sregs_set(vm, VCPU_ID, &sregs);
 			break;
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+			TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 92915e6408e7..e6e62e5e75b2 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -21,10 +21,10 @@
 
 void l2_guest_code(void)
 {
-	GUEST_SYNC(6);
-
 	GUEST_SYNC(7);
 
+	GUEST_SYNC(8);
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
@@ -50,12 +50,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_SYNC(5);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	current_evmcs->revision_id = -1u;
+	GUEST_ASSERT(vmlaunch());
+	current_evmcs->revision_id = EVMCS_VERSION;
+	GUEST_SYNC(6);
+
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-	GUEST_SYNC(8);
+	GUEST_SYNC(9);
 	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-	GUEST_SYNC(9);
+	GUEST_SYNC(10);
 }
 
 void guest_code(struct vmx_pages *vmx_pages)
@@ -67,6 +72,10 @@ void guest_code(struct vmx_pages *vmx_pages)
 		l1_guest_code(vmx_pages);
 
 	GUEST_DONE();
+
+	/* Try enlightened vmptrld with an incorrect GPA */
+	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+	GUEST_ASSERT(vmlaunch());
 }
 
 int main(int argc, char *argv[])
@@ -87,7 +96,7 @@ int main(int argc, char *argv[])
 
 	if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
 	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		printf("capabilities not available, skipping test\n");
+		print_skip("capabilities not available");
 		exit(KSFT_SKIP);
 	}
 
@@ -109,20 +118,20 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+		      		  __FILE__, uc.args[1]);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
-			goto done;
+			goto part1_done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 
 		/* UCALL_SYNC is handled here.  */
 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
 			    stage, (ulong)uc.args[1]);
 
 		state = vcpu_save_state(vm, VCPU_ID);
@@ -147,6 +156,10 @@ int main(int argc, char *argv[])
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
 	}
 
-done:
+part1_done:
+	_vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+		    "Unexpected successful VMEnter with invalid eVMCS pointer!");
+
 	kvm_vm_free(vm);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 443a2b54645b..83323f3d7ca0 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -66,7 +66,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 
 		TEST_ASSERT((entry->function >= 0x40000000) &&
 			    (entry->function <= 0x4000000A),
-			    "function %lx is our of supported range",
+			    "function %x is our of supported range",
 			    entry->function);
 
 		TEST_ASSERT(entry->index == 0,
@@ -141,8 +141,7 @@ int main(int argc, char *argv[])
 
 	rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
 	if (!rv) {
-		fprintf(stderr,
-			"KVM_CAP_HYPERV_CPUID not supported, skip test\n");
+		print_skip("KVM_CAP_HYPERV_CPUID not supported");
 		exit(KSFT_SKIP);
 	}
 
@@ -160,8 +159,7 @@ int main(int argc, char *argv[])
 	free(hv_cpuid_entries);
 
 	if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		fprintf(stderr,
-			"Enlightened VMCS is unsupported, skip related test\n");
+		print_skip("Enlightened VMCS is unsupported");
 		goto vm_free;
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index 00bb97d76000..e6480fd5c4bd 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -44,7 +44,7 @@ void *thr(void *arg)
 	struct kvm_run *run = tc->run;
 
 	res = ioctl(kvmcpu, KVM_RUN, 0);
-	printf("ret1=%d exit_reason=%d suberror=%d\n",
+	pr_info("ret1=%d exit_reason=%d suberror=%d\n",
 		res, run->exit_reason, run->internal.suberror);
 
 	return 0;
@@ -93,12 +93,12 @@ int main(void)
 	int warnings_before, warnings_after;
 
 	if (!is_intel_cpu()) {
-		printf("Must be run on an Intel CPU, skipping test\n");
+		print_skip("Must be run on an Intel CPU");
 		exit(KSFT_SKIP);
 	}
 
 	if (vm_is_unrestricted_guest(NULL)) {
-		printf("Unrestricted guest must be disabled, skipping test\n");
+		print_skip("Unrestricted guest must be disabled");
 		exit(KSFT_SKIP);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index f9334bd3cce9..1e89688cbbbf 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -58,8 +58,7 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
 			exit_reason_str(run->exit_reason));
 	get_ucall(vm, VCPU_ID, &uc);
 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
-			"Received ucall other than UCALL_SYNC: %u\n",
-			ucall);
+			"Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
 	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
 		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
@@ -89,8 +88,7 @@ int main(int argc, char *argv[])
 
 	rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
 	if (!rv) {
-		fprintf(stderr,
-			"KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+		print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
 		exit(KSFT_SKIP);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
new file mode 100644
index 000000000000..c6691cff4e19
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.  The
+ * location and size are specifically 2mb sized/aligned so that the initial
+ * region corresponds to exactly one large page.
+ */
+#define MEM_REGION_GPA		0xc0000000
+#define MEM_REGION_SIZE		0x200000
+#define MEM_REGION_SLOT		10
+
+static void guest_code(void)
+{
+	uint64_t val;
+
+	do {
+		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+	} while (!val);
+
+	if (val != 1)
+		ucall(UCALL_ABORT, 1, val);
+
+	GUEST_DONE();
+}
+
+static void *vcpu_worker(void *data)
+{
+	struct kvm_vm *vm = data;
+	struct kvm_run *run;
+	struct ucall uc;
+	uint64_t cmd;
+
+	/*
+	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
+	 * which will occur if the guest attempts to access a memslot while it
+	 * is being moved.
+	 */
+	run = vcpu_state(vm, VCPU_ID);
+	do {
+		vcpu_run(vm, VCPU_ID);
+	} while (run->exit_reason == KVM_EXIT_MMIO);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason = %d", run->exit_reason);
+
+	cmd = get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %lu", uc.args[0]);
+	return NULL;
+}
+
+static void test_move_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t *hva;
+	uint64_t gpa;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+
+	/*
+	 * Allocate and map two pages so that the GPA accessed by guest_code()
+	 * stays valid across the memslot move.
+	 */
+	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+	/* Ditto for the host mapping so that both pages can be zeroed. */
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, 2 * 4096);
+
+	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+	/* Ensure the guest thread is spun up. */
+	usleep(100000);
+
+	/*
+	 * Shift the region's base GPA.  The guest should not see "2" as the
+	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
+	 * different host pfn.
+	 */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+	WRITE_ONCE(*hva, 2);
+
+	usleep(100000);
+
+	/*
+	 * Note, value in memory needs to be changed *before* restoring the
+	 * memslot, else the guest could race the update and see "2".
+	 */
+	WRITE_ONCE(*hva, 1);
+
+	/* Restore the original base, the guest should see "1". */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+
+	pthread_join(vcpu_thread, NULL);
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	int i, loops;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	if (argc > 1)
+		loops = atoi(argv[1]);
+	else
+		loops = 10;
+
+	for (i = 0; i < loops; i++)
+		test_move_memory_region();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8c063646f2a0..8230b6bc6b8f 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -117,7 +117,7 @@ int main(int argc, char *argv[])
 		vcpu_alloc_vmx(vm, &vmx_pages_gva);
 		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 	} else {
-		printf("will skip SMM test with VMX enabled\n");
+		pr_info("will skip SMM test with VMX enabled\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 3ab5ec3da9f4..5b1a016edf55 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -139,7 +139,7 @@ int main(int argc, char *argv[])
 		vcpu_alloc_vmx(vm, &vmx_pages_gva);
 		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 	} else {
-		printf("will skip nested state checks\n");
+		pr_info("will skip nested state checks\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
 	}
 
@@ -152,20 +152,20 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+			       	  __FILE__, uc.args[1]);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 
 		/* UCALL_SYNC is handled here.  */
 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
 			    stage, (ulong)uc.args[1]);
 
 		state = vcpu_save_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index e280f68f6365..0e1adb4e3199 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -61,16 +61,14 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s",
-				    (const char *)uc.args[0]);
+			TEST_FAIL("%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false,
-				    "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
 		}
 	}
 done:
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 5c8224256294..d672f0a473f8 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -91,11 +91,11 @@ int main(int argc, char *argv[])
 
 	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
 	if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
-		fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+		print_skip("KVM_CAP_SYNC_REGS not supported");
 		exit(KSFT_SKIP);
 	}
 	if ((cap & INVALID_SYNC_FIELD) != 0) {
-		fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+		print_skip("The \"invalid\" field is not invalid");
 		exit(KSFT_SKIP);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 5dfb53546a26..fe40ade06a49 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -78,10 +78,10 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+			TEST_FAIL("%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index a223a6401258..e894a638a155 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -21,7 +21,7 @@
 
 /* The memory slot index to track dirty pages */
 #define TEST_MEM_SLOT_INDEX		1
-#define TEST_MEM_SIZE			3
+#define TEST_MEM_PAGES			3
 
 /* L1 guest test virtual memory offset */
 #define GUEST_TEST_MEM			0xc0000000
@@ -91,15 +91,14 @@ int main(int argc, char *argv[])
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 				    GUEST_TEST_MEM,
 				    TEST_MEM_SLOT_INDEX,
-				    TEST_MEM_SIZE,
+				    TEST_MEM_PAGES,
 				    KVM_MEM_LOG_DIRTY_PAGES);
 
 	/*
 	 * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
 	 * affects both L1 and L2.  However...
 	 */
-	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
-		 TEST_MEM_SIZE * 4096, 0);
+	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
 
 	/*
 	 * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -113,11 +112,11 @@ int main(int argc, char *argv[])
 	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
 	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
 
-	bmap = bitmap_alloc(TEST_MEM_SIZE);
+	bmap = bitmap_alloc(TEST_MEM_PAGES);
 	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
 
 	while (!done) {
-		memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
+		memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
 		_vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Unexpected exit reason: %u (%s),\n",
@@ -126,8 +125,8 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+			       	  __FILE__, uc.args[1]);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			/*
@@ -152,7 +151,7 @@ int main(int argc, char *argv[])
 			done = true;
 			break;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 9ef7fab39d48..54cdefdfb49d 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -212,7 +212,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	test_nested_state(vm, state);
 	vcpu_nested_state_get(vm, VCPU_ID, state);
 	TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
-		    "Size must be between %d and %d.  The size returned was %d.",
+		    "Size must be between %ld and %d.  The size returned was %d.",
 		    sizeof(*state), state_sz, state->size);
 	TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
 	TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
@@ -228,7 +228,7 @@ int main(int argc, char *argv[])
 	have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
 
 	if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		printf("KVM_CAP_NESTED_STATE not available, skipping test\n");
+		print_skip("KVM_CAP_NESTED_STATE not available");
 		exit(KSFT_SKIP);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 69e482a95c47..fbe8417cbc2c 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -121,8 +121,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 
 static void report(int64_t val)
 {
-	printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
-	       val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+	pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+		val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
 }
 
 int main(int argc, char *argv[])
@@ -150,7 +150,7 @@ int main(int argc, char *argv[])
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+			TEST_FAIL("%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			report(uc.args[1]);
@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 851ea81b9d9f..3529376747c2 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
 		xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
 	}
 	if (!xss_supported) {
-		printf("IA32_XSS is not supported by the vCPU.\n");
+		print_skip("IA32_XSS is not supported by the vCPU");
 		exit(KSFT_SKIP);
 	}
 
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 3ed0134a764d..b0556c752443 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -137,7 +137,8 @@ endif
 # Selftest makefiles can override those targets by setting
 # OVERRIDE_TARGETS = 1.
 ifeq ($(OVERRIDE_TARGETS),)
-$(OUTPUT)/%:%.c
+LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
+$(OUTPUT)/%:%.c $(LOCAL_HDRS)
 	$(LINK.c) $^ $(LDLIBS) -o $@
 
 $(OUTPUT)/%.o:%.S
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
new file mode 100644
index 000000000000..f26212605b6b
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -0,0 +1,2 @@
+*.sh
+!run.sh
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
index 8745eba39012..da438e780ffe 100644
--- a/tools/testing/selftests/media_tests/.gitignore
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 media_device_test
 media_device_open
 video_device_test
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
index f2f7ec0a99b4..f2fbba178601 100644
--- a/tools/testing/selftests/membarrier/.gitignore
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 membarrier_test_multi_thread
 membarrier_test_single_thread
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
index afe87c40ac80..dd9a051f608e 100644
--- a/tools/testing/selftests/memfd/.gitignore
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 fuse_mnt
 fuse_test
 memfd_test
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 53a848109f7b..0a15f9e23431 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,9 +4,8 @@ CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_GEN_PROGS := memfd_test
+TEST_GEN_PROGS := memfd_test fuse_test fuse_mnt
 TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
-TEST_GEN_FILES := fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 
@@ -14,7 +13,7 @@ include ../lib.mk
 
 $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
 
-$(OUTPUT)/memfd_test: memfd_test.c common.o
-$(OUTPUT)/fuse_test: fuse_test.c common.o
+$(OUTPUT)/memfd_test: memfd_test.c common.c
+$(OUTPUT)/fuse_test: fuse_test.c common.c
 
-EXTRA_CLEAN = common.o
+EXTRA_CLEAN = $(OUTPUT)/common.o
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
index 856ad4107eb3..0bc64a6d4c18 100644
--- a/tools/testing/selftests/mount/.gitignore
+++ b/tools/testing/selftests/mount/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 unprivileged-remount-test
diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore
index d8d42377205a..72ad8ca691c9 100644
--- a/tools/testing/selftests/mqueue/.gitignore
+++ b/tools/testing/selftests/mqueue/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 mq_open_tests
 mq_perf_tests
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ecc52d4c034d..742c499328b2 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 msg_zerocopy
 socket
 psock_fanout
@@ -23,3 +24,8 @@ so_txtime
 tcp_fastopen_backup_key
 nettest
 fin_ack_lat
+reuseaddr_ports_exhausted
+hwtstamp_config
+rxtimestamp
+timestamping
+txtimestamp
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 287ae916ec0b..3f386eb9e7d7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,7 +11,11 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
 TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
 TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
+TEST_PROGS += route_localnet.sh
+TEST_PROGS += reuseaddr_ports_exhausted.sh
+TEST_PROGS += txtimestamp.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -20,6 +24,8 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
 TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_FILES += fin_ack_lat
+TEST_GEN_FILES += reuseaddr_ports_exhausted
+TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index b8503a8119b0..3b42c06b5985 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -12,6 +12,7 @@ CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
 CONFIG_BRIDGE=y
 CONFIG_VLAN_8021Q=y
+CONFIG_IFB=y
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
@@ -27,5 +28,6 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_NETEM=y
 CONFIG_TEST_BLACKHOLE_DEV=m
 CONFIG_KALLSYMS=y
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 60273f1bc7d9..b7616704b55e 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1041,6 +1041,27 @@ ipv6_addr_metric_test()
 	fi
 	log_test $rc 0 "Prefix route with metric on link up"
 
+	# verify peer metric added correctly
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+	set +e
+
+	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+	log_test $? 0 "Set metric with peer route on local side"
+	log_test $? 0 "User specified metric on local address"
+	check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+	log_test $? 0 "Set metric with peer route on peer side"
+
+	set -e
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+	set +e
+
+	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+	log_test $? 0 "Modify metric and peer address on local side"
+	check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+	log_test $? 0 "Modify metric and peer address on peer side"
+
 	$IP li del dummy1
 	$IP li del dummy2
 	cleanup
@@ -1457,13 +1478,20 @@ ipv4_addr_metric_test()
 
 	run_cmd "$IP addr flush dev dummy2"
 	run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
-	run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
 	rc=$?
 	if [ $rc -eq 0 ]; then
-		check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+		check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
+		rc=$?
+	fi
+	log_test $rc 0 "Set metric of address with peer route"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
 		rc=$?
 	fi
-	log_test $rc 0 "Modify metric of address with peer route"
+	log_test $rc 0 "Modify metric and peer address for peer route"
 
 	$IP li del dummy1
 	$IP li del dummy2
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
index a793eef5b876..2dea317f12e7 100644
--- a/tools/testing/selftests/net/forwarding/.gitignore
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
new file mode 100644
index 000000000000..250fbb2d1625
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = bridge_igmp.sh \
+	bridge_port_isolation.sh \
+	bridge_sticky_fdb.sh \
+	bridge_vlan_aware.sh \
+	bridge_vlan_unaware.sh \
+	ethtool.sh \
+	gre_inner_v4_multipath.sh \
+	gre_inner_v6_multipath.sh \
+	gre_multipath.sh \
+	ip6gre_inner_v4_multipath.sh \
+	ip6gre_inner_v6_multipath.sh \
+	ipip_flat_gre_key.sh \
+	ipip_flat_gre_keys.sh \
+	ipip_flat_gre.sh \
+	ipip_hier_gre_key.sh \
+	ipip_hier_gre_keys.sh \
+	ipip_hier_gre.sh \
+	loopback.sh \
+	mirror_gre_bound.sh \
+	mirror_gre_bridge_1d.sh \
+	mirror_gre_bridge_1d_vlan.sh \
+	mirror_gre_bridge_1q_lag.sh \
+	mirror_gre_bridge_1q.sh \
+	mirror_gre_changes.sh \
+	mirror_gre_flower.sh \
+	mirror_gre_lag_lacp.sh \
+	mirror_gre_neigh.sh \
+	mirror_gre_nh.sh \
+	mirror_gre.sh \
+	mirror_gre_vlan_bridge_1q.sh \
+	mirror_gre_vlan.sh \
+	mirror_vlan.sh \
+	router_bridge.sh \
+	router_bridge_vlan.sh \
+	router_broadcast.sh \
+	router_mpath_nh.sh \
+	router_multicast.sh \
+	router_multipath.sh \
+	router.sh \
+	router_vid_1.sh \
+	sch_ets.sh \
+	sch_tbf_ets.sh \
+	sch_tbf_prio.sh \
+	sch_tbf_root.sh \
+	tc_actions.sh \
+	tc_chains.sh \
+	tc_flower_router.sh \
+	tc_flower.sh \
+	tc_shblocks.sh \
+	tc_vlan_modify.sh \
+	vxlan_asymmetric.sh \
+	vxlan_bridge_1d_port_8472.sh \
+	vxlan_bridge_1d.sh \
+	vxlan_bridge_1q_port_8472.sh \
+	vxlan_bridge_1q.sh \
+	vxlan_symmetric.sh
+
+TEST_PROGS_EXTENDED := devlink_lib.sh \
+	ethtool_lib.sh \
+	fib_offload_lib.sh \
+	forwarding.config.sample \
+	ipip_lib.sh \
+	lib.sh \
+	mirror_gre_lib.sh \
+	mirror_gre_topo_lib.sh \
+	mirror_lib.sh \
+	mirror_topo_lib.sh \
+	sch_ets_core.sh \
+	sch_ets_tests.sh \
+	sch_tbf_core.sh \
+	sch_tbf_etsprio.sh \
+	tc_common.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 40b076983239..155d48bd4d9e 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then
 	exit 1
 fi
 
+devlink dev help 2>&1 | grep info &> /dev/null
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing devlink dev info support"
+	exit 1
+fi
+
 ##############################################################################
 # Devlink helpers
 
@@ -373,6 +379,7 @@ devlink_trap_drop_test()
 	local trap_name=$1; shift
 	local group_name=$1; shift
 	local dev=$1; shift
+	local handle=$1; shift
 
 	# This is the common part of all the tests. It checks that stats are
 	# initially idle, then non-idle after changing the trap action and
@@ -397,7 +404,7 @@ devlink_trap_drop_test()
 	devlink_trap_group_stats_idle_test $group_name
 	check_err $? "Trap group stats not idle after setting action to drop"
 
-	tc_check_packets "dev $dev egress" 101 0
+	tc_check_packets "dev $dev egress" $handle 0
 	check_err $? "Packets were not dropped"
 }
 
@@ -406,7 +413,68 @@ devlink_trap_drop_cleanup()
 	local mz_pid=$1; shift
 	local dev=$1; shift
 	local proto=$1; shift
+	local pref=$1; shift
+	local handle=$1; shift
 
 	kill $mz_pid && wait $mz_pid &> /dev/null
-	tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower
+	tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
+}
+
+devlink_trap_policers_num_get()
+{
+	devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_trap_policer_rate_get()
+{
+	local policer_id=$1; shift
+
+	devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+		| jq '.[][][]["rate"]'
+}
+
+devlink_trap_policer_burst_get()
+{
+	local policer_id=$1; shift
+
+	devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+		| jq '.[][][]["burst"]'
+}
+
+devlink_trap_policer_rx_dropped_get()
+{
+	local policer_id=$1; shift
+
+	devlink -j -p -s trap policer show $DEVLINK_DEV policer $policer_id \
+		| jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
+devlink_trap_group_policer_get()
+{
+	local group_name=$1; shift
+
+	devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+		| jq '.[][][]["policer"]'
+}
+
+devlink_trap_policer_ids_get()
+{
+	devlink -j -p trap policer show \
+		| jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
+}
+
+devlink_port_by_netdev()
+{
+	local if_name=$1
+
+	devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]'
+}
+
+devlink_cpu_port_get()
+{
+	local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" |
+				grep cpu | cut -d/ -f3 | cut -d: -f1 |
+				sed -n '1p')
+
+	echo "$DEVLINK_DEV/$cpu_dl_port_num"
 }
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..925d229a59d8 100755..100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 2f5da414aaa7..977fc2b326a2 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -60,6 +60,15 @@ check_tc_chain_support()
 	fi
 }
 
+check_tc_action_hw_stats_support()
+{
+	tc actions help 2>&1 | grep -q hw_stats
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
+		exit 1
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit 0
@@ -248,13 +257,40 @@ busywait()
 	done
 }
 
+not()
+{
+	"$@"
+	[[ $? != 0 ]]
+}
+
+grep_bridge_fdb()
+{
+	local addr=$1; shift
+	local word
+	local flag
+
+	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
+		word=$1; shift
+		if [ "$1" == "-v" ]; then
+			flag=$1; shift
+		fi
+	fi
+
+	$@ | grep $addr | grep $flag "$word"
+}
+
+wait_for_offload()
+{
+	"$@" | grep -q offload
+}
+
 until_counter_is()
 {
-	local value=$1; shift
+	local expr=$1; shift
 	local current=$("$@")
 
 	echo $((current))
-	((current >= value))
+	((current $expr))
 }
 
 busywait_for_counter()
@@ -263,7 +299,7 @@ busywait_for_counter()
 	local delta=$1; shift
 
 	local base=$("$@")
-	busywait "$timeout" until_counter_is $((base + delta)) "$@"
+	busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
 }
 
 setup_wait_dev()
@@ -599,6 +635,17 @@ tc_rule_stats_get()
 	    | jq ".[1].options.actions[].stats$selector"
 }
 
+tc_rule_handle_stats_get()
+{
+	local id=$1; shift
+	local handle=$1; shift
+	local selector=${1:-.packets}; shift
+
+	tc -j -s filter show $id \
+	    | jq ".[] | select(.options.handle == $handle) | \
+		  .options.actions[0].stats$selector"
+}
+
 ethtool_stats_get()
 {
 	local dev=$1; shift
@@ -607,6 +654,26 @@ ethtool_stats_get()
 	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
 }
 
+qdisc_stats_get()
+{
+	local dev=$1; shift
+	local handle=$1; shift
+	local selector=$1; shift
+
+	tc -j -s qdisc show dev "$dev" \
+	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
+}
+
+qdisc_parent_stats_get()
+{
+	local dev=$1; shift
+	local parent=$1; shift
+	local selector=$1; shift
+
+	tc -j -s qdisc show dev "$dev" invisible \
+	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
+}
+
 humanize()
 {
 	local speed=$1; shift
@@ -1132,18 +1199,29 @@ flood_test()
 	flood_multicast_test $br_port $host1_if $host2_if
 }
 
-start_traffic()
+__start_traffic()
 {
+	local proto=$1; shift
 	local h_in=$1; shift    # Where the traffic egresses the host
 	local sip=$1; shift
 	local dip=$1; shift
 	local dmac=$1; shift
 
 	$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
-		-a own -b $dmac -t udp -q &
+		-a own -b $dmac -t "$proto" -q "$@" &
 	sleep 1
 }
 
+start_traffic()
+{
+	__start_traffic udp "$@"
+}
+
+start_tcp_traffic()
+{
+	__start_traffic tcp "$@"
+}
+
 stop_traffic()
 {
 	# Suppress noise from killing mausezahn.
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
new file mode 100755
index 000000000000..b50081855913
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -0,0 +1,238 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_ip_dsfield
+	test_ip_dscp
+	test_ip_ecn
+	test_ip_dscp_ecn
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+do_test_pedit_dsfield_common()
+{
+	local pedit_locus=$1; shift
+	local pedit_action=$1; shift
+	local mz_flags=$1; shift
+
+	RET=0
+
+	# TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is
+	# overwritten when zero is selected.
+	$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+	    -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345
+
+	local pkts
+	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+			tc_rule_handle_stats_get "dev $h2 ingress" 101)
+	check_err $? "Expected to get 10 packets, but got $pkts."
+	log_test "$pedit_locus pedit $pedit_action"
+}
+
+do_test_pedit_dsfield()
+{
+	local pedit_locus=$1; shift
+	local pedit_action=$1; shift
+	local match_prot=$1; shift
+	local match_flower=$1; shift
+	local mz_flags=$1; shift
+	local saddr=$1; shift
+	local daddr=$1; shift
+
+	tc filter add $pedit_locus handle 101 pref 1 \
+	   flower action pedit ex munge $pedit_action
+	tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+	   flower skip_hw $match_flower action pass
+
+	do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $pedit_locus pref 1
+}
+
+do_test_ip_dsfield()
+{
+	local locus=$1; shift
+	local dsfield
+
+	for dsfield in 0 1 2 3 128 252 253 254 255; do
+		do_test_pedit_dsfield "$locus"				\
+				      "ip dsfield set $dsfield"		\
+				      ip "ip_tos $dsfield"		\
+				      "-A 192.0.2.1 -B 192.0.2.2"
+	done
+}
+
+test_ip_dsfield()
+{
+	do_test_ip_dsfield "dev $swp1 ingress"
+	do_test_ip_dsfield "dev $swp2 egress"
+}
+
+do_test_ip_dscp()
+{
+	local locus=$1; shift
+	local dscp
+
+	for dscp in 0 1 2 3 32 61 62 63; do
+		do_test_pedit_dsfield "$locus"				       \
+				  "ip dsfield set $((dscp << 2)) retain 0xfc"  \
+				  ip "ip_tos $(((dscp << 2) | 1))"	       \
+				  "-A 192.0.2.1 -B 192.0.2.2"
+	done
+}
+
+test_ip_dscp()
+{
+	do_test_ip_dscp "dev $swp1 ingress"
+	do_test_ip_dscp "dev $swp2 egress"
+}
+
+do_test_ip_ecn()
+{
+	local locus=$1; shift
+	local ecn
+
+	for ecn in 0 1 2 3; do
+		do_test_pedit_dsfield "$locus"				\
+				      "ip dsfield set $ecn retain 0x03"	\
+				      ip "ip_tos $((124 | $ecn))"	\
+				      "-A 192.0.2.1 -B 192.0.2.2"
+	done
+}
+
+test_ip_ecn()
+{
+	do_test_ip_ecn "dev $swp1 ingress"
+	do_test_ip_ecn "dev $swp2 egress"
+}
+
+do_test_ip_dscp_ecn()
+{
+	local locus=$1; shift
+
+	tc filter add $locus handle 101 pref 1				\
+	   flower action pedit ex munge ip dsfield set 124 retain 0xfc	\
+		  action pedit ex munge ip dsfield set 1 retain 0x03
+	tc filter add dev $h2 ingress handle 101 pref 1 prot ip		\
+	   flower skip_hw ip_tos 125 action pass
+
+	do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN"	\
+				      "-A 192.0.2.1 -B 192.0.2.2"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $locus pref 1
+}
+
+test_ip_dscp_ecn()
+{
+	do_test_ip_dscp_ecn "dev $swp1 ingress"
+	do_test_ip_dscp_ecn "dev $swp2 egress"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 40e0ad1bc4f2..e60c8b4818cc 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -34,11 +34,14 @@ switch_destroy()
 }
 
 # Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
 {
-	local stream=$1; shift
+	local -a streams=("$@")
+	local stream
 
-	link_stats_get $h2.1$stream rx bytes
+	for stream in ${streams[@]}; do
+		qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+	done
 }
 
 ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 3c3b204d47e8..cdf689e99458 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -2,7 +2,7 @@
 
 # Global interface:
 #  $put -- port under test (e.g. $swp2)
-#  get_stats($band) -- A function to collect stats for band
+#  collect_stats($streams...) -- A function to get stats for individual streams
 #  ets_start_traffic($band) -- Start traffic for this band
 #  ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
 
@@ -94,15 +94,11 @@ __ets_dwrr_test()
 
 	sleep 10
 
-	t0=($(for stream in ${streams[@]}; do
-		  get_stats $stream
-	      done))
+	t0=($(collect_stats "${streams[@]}"))
 
 	sleep 10
 
-	t1=($(for stream in ${streams[@]}; do
-		  get_stats $stream
-	      done))
+	t1=($(collect_stats "${streams[@]}"))
 	d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
 		 echo $((${t1[$i]} - ${t0[$i]}))
 	     done))
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
new file mode 100755
index 000000000000..e3bd8a6bb8b4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by an action skbedit priority. The
+# new priority should be taken into account when classifying traffic on the PRIO
+# qdisc at $swp2. The test verifies that for different priority values, the
+# traffic ends up in expected PRIO band.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |                                                             PRIO   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_ingress
+	test_egress
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+	tc qdisc add dev $swp2 root handle 10: \
+	   prio bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 root
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+test_skbedit_priority_one()
+{
+	local locus=$1; shift
+	local prio=$1; shift
+	local classid=$1; shift
+
+	RET=0
+
+	tc filter add $locus handle 101 pref 1 \
+	   flower action skbedit priority $prio
+
+	local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets)
+	local pkt2=$(tc_rule_handle_stats_get "$locus" 101)
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \
+	    -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q
+
+	local pkt1
+	pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \
+			qdisc_parent_stats_get $swp2 $classid .packets)
+	check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))."
+
+	local pkt3=$(tc_rule_handle_stats_get "$locus" 101)
+	((pkt3 >= pkt2 + 10))
+	check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))."
+
+	log_test "$locus skbedit priority $prio -> classid $classid"
+
+	tc filter del $locus pref 1
+}
+
+test_ingress()
+{
+	local prio
+
+	for prio in {0..7}; do
+		test_skbedit_priority_one "dev $swp1 ingress" \
+					  $prio 10:$((8 - prio))
+	done
+}
+
+test_egress()
+{
+	local prio
+
+	for prio in {0..7}; do
+		test_skbedit_priority_one "dev $swp2 egress" \
+					  $prio 10:$((8 - prio))
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 64f652633585..0e18e8be6e2a 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -6,39 +6,14 @@ CHECK_TC="yes"
 # Can be overridden by the configuration file. See lib.sh
 TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
 
-__tc_check_packets()
-{
-	local id=$1
-	local handle=$2
-	local count=$3
-	local operator=$4
-
-	start_time="$(date -u +%s%3N)"
-	while true
-	do
-		cmd_jq "tc -j -s filter show $id" \
-		       ".[] | select(.options.handle == $handle) | \
-			    select(.options.actions[0].stats.packets $operator $count)" \
-		    &> /dev/null
-		ret=$?
-		if [[ $ret -eq 0 ]]; then
-			return $ret
-		fi
-		current_time="$(date -u +%s%3N)"
-		diff=$(expr $current_time - $start_time)
-		if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
-			return 1
-		fi
-	done
-}
-
 tc_check_packets()
 {
 	local id=$1
 	local handle=$2
 	local count=$3
 
-	__tc_check_packets "$id" "$handle" "$count" "=="
+	busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \
+		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
 }
 
 tc_check_packets_hitting()
@@ -46,5 +21,6 @@ tc_check_packets_hitting()
 	local id=$1
 	local handle=$2
 
-	__tc_check_packets "$id" "$handle" 0 ">"
+	busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \
+		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
 }
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..e1fdee841021 100644
--- a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index d72f07642738..260336d5f0b1 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,2 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 mptcp_connect
+pm_nl_ctl
 *.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index ba450e62dc5b..f50976ee7d44 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,12 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 
 top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
 
-TEST_PROGS := mptcp_connect.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
 
-TEST_GEN_FILES = mptcp_connect
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 99579c0223c1..cedee5b952ba 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -34,8 +34,8 @@ extern int optind;
 #define TCP_ULP 31
 #endif
 
+static int  poll_timeout = 10 * 1000;
 static bool listen_mode;
-static int  poll_timeout;
 
 enum cfg_mode {
 	CFG_MODE_POLL,
@@ -50,11 +50,21 @@ static int cfg_sock_proto	= IPPROTO_MPTCP;
 static bool tcpulp_audit;
 static int pf = AF_INET;
 static int cfg_sndbuf;
+static int cfg_rcvbuf;
+static bool cfg_join;
 
 static void die_usage(void)
 {
-	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]"
-		"[ -l ] [ -t timeout ] connect_address\n");
+	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
+		"[-l] connect_address\n");
+	fprintf(stderr, "\t-6 use ipv6\n");
+	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+	fprintf(stderr, "\t-p num -- use port num\n");
+	fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
+	fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+	fprintf(stderr, "\t-u -- check mptcp ulp\n");
 	exit(1);
 }
 
@@ -97,6 +107,17 @@ static void xgetaddrinfo(const char *node, const char *service,
 	}
 }
 
+static void set_rcvbuf(int fd, unsigned int size)
+{
+	int err;
+
+	err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
+	if (err) {
+		perror("set SO_RCVBUF");
+		exit(1);
+	}
+}
+
 static void set_sndbuf(int fd, unsigned int size)
 {
 	int err;
@@ -230,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 
 static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 {
+	static bool first = true;
 	unsigned int do_w;
 	ssize_t bw;
 
@@ -237,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 	if (do_w == 0 || do_w > len)
 		do_w = len;
 
+	if (cfg_join && first && do_w > 100)
+		do_w = 100;
+
 	bw = write(fd, buf, do_w);
 	if (bw < 0)
 		perror("write");
 
+	/* let the join handshake complete, before going on */
+	if (cfg_join && first) {
+		usleep(200000);
+		first = false;
+	}
+
 	return bw;
 }
 
@@ -365,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 					break;
 
 				/* ... but we still receive.
-				 * Close our write side.
+				 * Close our write side, ev. give some time
+				 * for address notification
 				 */
+				if (cfg_join)
+					usleep(400000);
 				shutdown(peerfd, SHUT_WR);
 			} else {
 				if (errno == EINTR)
@@ -383,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 		}
 	}
 
+	/* leave some time for late join/announce */
+	if (cfg_join)
+		usleep(400000);
+
 	close(peerfd);
 	return 0;
 }
@@ -638,7 +676,7 @@ static void maybe_close(int fd)
 {
 	unsigned int r = rand();
 
-	if (r & 1)
+	if (!cfg_join && (r & 1))
 		close(fd);
 }
 
@@ -704,6 +742,8 @@ int main_loop(void)
 
 	check_getpeername_connect(fd);
 
+	if (cfg_rcvbuf)
+		set_rcvbuf(fd, cfg_rcvbuf);
 	if (cfg_sndbuf)
 		set_sndbuf(fd, cfg_sndbuf);
 
@@ -745,7 +785,7 @@ int parse_mode(const char *mode)
 	return 0;
 }
 
-int parse_sndbuf(const char *size)
+static int parse_int(const char *size)
 {
 	unsigned long s;
 
@@ -765,17 +805,19 @@ int parse_sndbuf(const char *size)
 		die_usage();
 	}
 
-	cfg_sndbuf = s;
-
-	return 0;
+	return (int)s;
 }
 
 static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) {
+	while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
 		switch (c) {
+		case 'j':
+			cfg_join = true;
+			cfg_mode = CFG_MODE_POLL;
+			break;
 		case 'l':
 			listen_mode = true;
 			break;
@@ -802,8 +844,11 @@ static void parse_opts(int argc, char **argv)
 		case 'm':
 			cfg_mode = parse_mode(optarg);
 			break;
-		case 'b':
-			cfg_sndbuf = parse_sndbuf(optarg);
+		case 'S':
+			cfg_sndbuf = parse_int(optarg);
+			break;
+		case 'R':
+			cfg_rcvbuf = parse_int(optarg);
 			break;
 		}
 	}
@@ -831,6 +876,8 @@ int main(int argc, char *argv[])
 		if (fd < 0)
 			return 1;
 
+		if (cfg_rcvbuf)
+			set_rcvbuf(fd, cfg_rcvbuf);
 		if (cfg_sndbuf)
 			set_sndbuf(fd, cfg_sndbuf);
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index d573a0feb98d..acf02e156d20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
 
 time_start=$(date +%s)
 
-optstring="b:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:"
 ret=0
 sin=""
 sout=""
@@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101))
 tc_reorder=""
 testmode=""
 sndbuf=0
+rcvbuf=0
 options_log=true
 
 if [ $tc_loss -eq 100 ];then
@@ -39,7 +40,8 @@ usage() {
 	echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
 	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
 	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
-	echo -e "\t-b: set sndbuf value (default: use kernel default)"
+	echo -e "\t-S: set sndbuf value (default: use kernel default)"
+	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
 	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
 }
 
@@ -73,11 +75,19 @@ while getopts "$optstring" option;do
 	"c")
 		capture=true
 		;;
-	"b")
+	"S")
 		if [ $OPTARG -ge 0 ];then
 			sndbuf="$OPTARG"
 		else
-			echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2
+			echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
+			exit 1
+		fi
+		;;
+	"R")
+		if [ $OPTARG -ge 0 ];then
+			rcvbuf="$OPTARG"
+		else
+			echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
 			exit 1
 		fi
 		;;
@@ -342,8 +352,12 @@ do_transfer()
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
 
+	if [ "$rcvbuf" -gt 0 ]; then
+		extra_args="$extra_args -R $rcvbuf"
+	fi
+
 	if [ "$sndbuf" -gt 0 ]; then
-		extra_args="$extra_args -b $sndbuf"
+		extra_args="$extra_args -S $sndbuf"
 	fi
 
 	if [ -n "$testmode" ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
new file mode 100755
index 000000000000..dd42c2f692d0
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -0,0 +1,357 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+timeout=30
+capture=0
+
+TEST_COUNT=0
+
+init()
+{
+	capout=$(mktemp)
+
+	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+
+	ns1="ns1-$rndh"
+	ns2="ns2-$rndh"
+
+	for netns in "$ns1" "$ns2";do
+		ip netns add $netns || exit $ksft_skip
+		ip -net $netns link set lo up
+		ip netns exec $netns sysctl -q net.mptcp.enabled=1
+		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
+		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+	done
+
+	#  ns1              ns2
+	# ns1eth1    ns2eth1
+	# ns1eth2    ns2eth2
+	# ns1eth3    ns2eth3
+	# ns1eth4    ns2eth4
+
+	for i in `seq 1 4`; do
+		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+		ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+		ip -net "$ns1" link set ns1eth$i up
+
+		ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+		ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+		ip -net "$ns2" link set ns2eth$i up
+
+		# let $ns2 reach any $ns1 address from any interface
+		ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+	done
+}
+
+cleanup_partial()
+{
+	rm -f "$capout"
+
+	for netns in "$ns1" "$ns2"; do
+		ip netns del $netns
+	done
+}
+
+cleanup()
+{
+	rm -f "$cin" "$cout"
+	rm -f "$sin" "$sout"
+	cleanup_partial
+}
+
+reset()
+{
+	cleanup_partial
+	init
+}
+
+for arg in "$@"; do
+	if [ "$arg" = "-c" ]; then
+		capture=1
+	fi
+done
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+
+check_transfer()
+{
+	in=$1
+	out=$2
+	what=$3
+
+	cmp "$in" "$out" > /dev/null 2>&1
+	if [ $? -ne 0 ] ;then
+		echo "[ FAIL ] $what does not match (in, out):"
+		print_file_err "$in"
+		print_file_err "$out"
+
+		return 1
+	fi
+
+	return 0
+}
+
+do_ping()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	connect_addr="$3"
+
+	ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
+	if [ $? -ne 0 ] ; then
+		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
+		ret=1
+	fi
+}
+
+do_transfer()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	cl_proto="$3"
+	srv_proto="$4"
+	connect_addr="$5"
+
+	port=$((10000+$TEST_COUNT))
+	TEST_COUNT=$((TEST_COUNT+1))
+
+	:> "$cout"
+	:> "$sout"
+	:> "$capout"
+
+	if [ $capture -eq 1 ]; then
+		if [ -z $SUDO_USER ] ; then
+			capuser=""
+		else
+			capuser="-Z $SUDO_USER"
+		fi
+
+		capfile="mp_join-${listener_ns}.pcap"
+
+		echo "Capturing traffic for test $TEST_COUNT into $capfile"
+		ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+		cappid=$!
+
+		sleep 1
+	fi
+
+	ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+	spid=$!
+
+	sleep 1
+
+	ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	cpid=$!
+
+	wait $cpid
+	retc=$?
+	wait $spid
+	rets=$?
+
+	if [ $capture -eq 1 ]; then
+	    sleep 1
+	    kill $cappid
+	fi
+
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " client exit code $retc, server $rets" 1>&2
+		echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+		echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+
+		cat "$capout"
+		return 1
+	fi
+
+	check_transfer $sin $cout "file received by client"
+	retc=$?
+	check_transfer $cin $sout "file received by server"
+	rets=$?
+
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+		cat "$capout"
+		return 0
+	fi
+
+	cat "$capout"
+	return 1
+}
+
+make_file()
+{
+	name=$1
+	who=$2
+
+	SIZE=1
+
+	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+	echo "Created $name (size $SIZE KB) containing data sent by $who"
+}
+
+run_tests()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	connect_addr="$3"
+	lret=0
+
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		return
+	fi
+}
+
+chk_join_nr()
+{
+	local msg="$1"
+	local syn_nr=$2
+	local syn_ack_nr=$3
+	local ack_nr=$4
+	local count
+	local dump_stats
+
+	printf "%-36s %s" "$msg" "syn"
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$syn_nr" ]; then
+		echo "[fail] got $count JOIN[s] syn expected $syn_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - synack"
+	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$syn_ack_nr" ]; then
+		echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - ack"
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$ack_nr" ]; then
+		echo "[fail] got $count JOIN[s] ack expected $ack_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client"
+make_file "$sin" "server"
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN" "0" "0" "0"
+
+# subflow limted by client
+reset
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by client" 0 0 0
+
+# subflow limted by server
+reset
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by server" 1 1 0
+
+# subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow" 1 1 1
+
+# multiple subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows" 2 2 2
+
+# multiple subflows limited by serverf
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+# add_address, unused
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "unused signal address" 0 0 0
+
+# accept and use add_addr
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address" 1 1 1
+
+# accept and use add_addr with an additional subflow
+# note: signal address in server ns and local addresses in client ns must
+# belong to different subnets or one of the listed local address could be
+# used for 'add_addr' subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal" 2 2 2
+
+# accept and use add_addr with additional subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows and signal" 3 3 3
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
new file mode 100755
index 000000000000..9172746b6cf0
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ksft_skip=4
+ret=0
+
+usage() {
+	echo "Usage: $0 [ -h ]"
+}
+
+
+while getopts "$optstring" option;do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"?")
+		usage $0
+		exit 1
+		;;
+	esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+err=$(mktemp)
+ret=0
+
+cleanup()
+{
+	rm -f $out
+	ip netns del $ns1
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns1 || exit $ksft_skip
+ip -net $ns1 link set lo up
+ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+
+check()
+{
+	local cmd="$1"
+	local expected="$2"
+	local msg="$3"
+	local out=`$cmd 2>$err`
+	local cmd_ret=$?
+
+	printf "%-50s %s" "$msg"
+	if [ $cmd_ret -ne 0 ]; then
+		echo "[FAIL] command execution '$cmd' stderr "
+		cat $err
+		ret=1
+	elif [ "$out" = "$expected" ]; then
+		echo "[ OK ]"
+	else
+		echo -n "[FAIL] "
+		echo "expected '$expected' got '$out'"
+		ret=1
+	fi
+}
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "defaults limits"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
+check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags  10.0.1.1" "simple add/get addr"
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags  10.0.1.1
+id 2 flags subflow dev lo 10.0.1.2
+id 3 flags signal,backup 10.0.1.3" "dump addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl del 2
+check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags  10.0.1.1
+id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+
+for i in `seq 5 9`; do
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+done
+check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
+check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+
+for i in `seq 9 256`; do
+	ip netns exec $ns1 ./pm_nl_ctl del $i
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
+done
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags  10.0.1.1
+id 3 flags signal,backup 10.0.1.3
+id 4 flags signal 10.0.1.4
+id 5 flags signal 10.0.1.5
+id 6 flags signal 10.0.1.6
+id 7 flags signal 10.0.1.7
+id 8 flags signal 10.0.1.8" "id limit"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 9 1
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "rcv addrs above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 1 9
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "subflows above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
+subflows 8" "set limits"
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
new file mode 100644
index 000000000000..b24a2f17d415
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+
+#include "linux/mptcp.h"
+
+#ifndef MPTCP_PM_NAME
+#define MPTCP_PM_NAME		"mptcp_pm"
+#endif
+
+static void syntax(char *argv[])
+{
+	fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]);
+	fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+	fprintf(stderr, "\tdel <id>\n");
+	fprintf(stderr, "\tget <id>\n");
+	fprintf(stderr, "\tflush\n");
+	fprintf(stderr, "\tdump\n");
+	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+	exit(0);
+}
+
+static int init_genl_req(char *data, int family, int cmd, int version)
+{
+	struct nlmsghdr *nh = (void *)data;
+	struct genlmsghdr *gh;
+	int off = 0;
+
+	nh->nlmsg_type = family;
+	nh->nlmsg_flags = NLM_F_REQUEST;
+	nh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+	off += NLMSG_ALIGN(sizeof(*nh));
+
+	gh = (void *)(data + off);
+	gh->cmd = cmd;
+	gh->version = version;
+	off += NLMSG_ALIGN(sizeof(*gh));
+	return off;
+}
+
+static void nl_error(struct nlmsghdr *nh)
+{
+	struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh);
+	int len = nh->nlmsg_len - sizeof(*nh);
+	uint32_t off;
+
+	if (len < sizeof(struct nlmsgerr))
+		error(1, 0, "netlink error message truncated %d min %ld", len,
+		      sizeof(struct nlmsgerr));
+
+	if (!err->error) {
+		/* check messages from kernel */
+		struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh);
+
+		while (RTA_OK(attrs, len)) {
+			if (attrs->rta_type == NLMSGERR_ATTR_MSG)
+				fprintf(stderr, "netlink ext ack msg: %s\n",
+					(char *)RTA_DATA(attrs));
+			if (attrs->rta_type == NLMSGERR_ATTR_OFFS) {
+				memcpy(&off, RTA_DATA(attrs), 4);
+				fprintf(stderr, "netlink err off %d\n",
+					(int)off);
+			}
+			attrs = RTA_NEXT(attrs, len);
+		}
+	} else {
+		fprintf(stderr, "netlink error %d", err->error);
+	}
+}
+
+/* do a netlink command and, if max > 0, fetch the reply  */
+static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
+{
+	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+	socklen_t addr_len;
+	void *data = nh;
+	int rem, ret;
+	int err = 0;
+
+	nh->nlmsg_len = len;
+	ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr));
+	if (ret != len)
+		error(1, errno, "send netlink: %uB != %uB\n", ret, len);
+	if (max == 0)
+		return 0;
+
+	addr_len = sizeof(nladdr);
+	rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len);
+	if (ret < 0)
+		error(1, errno, "recv netlink: %uB\n", ret);
+
+	/* Beware: the NLMSG_NEXT macro updates the 'rem' argument */
+	for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) {
+		if (nh->nlmsg_type == NLMSG_ERROR) {
+			nl_error(nh);
+			err = 1;
+		}
+	}
+	if (err)
+		error(1, 0, "bailing out due to netlink error[s]");
+	return ret;
+}
+
+static int genl_parse_getfamily(struct nlmsghdr *nlh)
+{
+	struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
+	int len = nlh->nlmsg_len;
+	struct rtattr *attrs;
+
+	if (nlh->nlmsg_type != GENL_ID_CTRL)
+		error(1, errno, "Not a controller message, len=%d type=0x%x\n",
+		      nlh->nlmsg_len, nlh->nlmsg_type);
+
+	len -= NLMSG_LENGTH(GENL_HDRLEN);
+
+	if (len < 0)
+		error(1, errno, "wrong controller message len %d\n", len);
+
+	if (ghdr->cmd != CTRL_CMD_NEWFAMILY)
+		error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
+
+	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+	while (RTA_OK(attrs, len)) {
+		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
+			return *(__u16 *)RTA_DATA(attrs);
+		attrs = RTA_NEXT(attrs, len);
+	}
+
+	error(1, errno, "can't find CTRL_ATTR_FAMILY_ID attr");
+	return -1;
+}
+
+static int resolve_mptcp_pm_netlink(int fd)
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	int namelen;
+	int off = 0;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 0);
+
+	rta = (void *)(data + off);
+	namelen = strlen(MPTCP_PM_NAME) + 1;
+	rta->rta_type = CTRL_ATTR_FAMILY_NAME;
+	rta->rta_len = RTA_LENGTH(namelen);
+	memcpy(RTA_DATA(rta), MPTCP_PM_NAME, namelen);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, sizeof(data));
+	return genl_parse_getfamily((void *)data);
+}
+
+int add_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *nest;
+	struct nlmsghdr *nh;
+	u_int16_t family;
+	u_int32_t flags;
+	int nest_start;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ADD_ADDR,
+			    MPTCP_PM_VER);
+
+	if (argc < 3)
+		syntax(argv);
+
+	nest_start = off;
+	nest = (void *)(data + off);
+	nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	nest->rta_len = RTA_LENGTH(0);
+	off += NLMSG_ALIGN(nest->rta_len);
+
+	/* addr data */
+	rta = (void *)(data + off);
+	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+		family = AF_INET;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+		rta->rta_len = RTA_LENGTH(4);
+	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+		family = AF_INET6;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+		rta->rta_len = RTA_LENGTH(16);
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* family */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+	rta->rta_len = RTA_LENGTH(2);
+	memcpy(RTA_DATA(rta), &family, 2);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	for (arg = 3; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "flags")) {
+			char *tok, *str;
+
+			/* flags */
+			flags = 0;
+			if (++arg >= argc)
+				error(1, 0, " missing flags value");
+
+			/* do not support flag list yet */
+			for (str = argv[arg]; (tok = strtok(str, ","));
+			     str = NULL) {
+				if (!strcmp(tok, "subflow"))
+					flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+				else if (!strcmp(tok, "signal"))
+					flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+				else if (!strcmp(tok, "backup"))
+					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+				else
+					error(1, errno,
+					      "unknown flag %s", argv[arg]);
+			}
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &flags, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "id")) {
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "dev")) {
+			int32_t ifindex;
+
+			if (++arg >= argc)
+				error(1, 0, " missing dev name");
+
+			ifindex = if_nametoindex(argv[arg]);
+			if (!ifindex)
+				error(1, errno, "unknown device %s", argv[arg]);
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &ifindex, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+	nest->rta_len = off - nest_start;
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
+int del_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *nest;
+	struct nlmsghdr *nh;
+	int nest_start;
+	u_int8_t id;
+	int off = 0;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
+			    MPTCP_PM_VER);
+
+	/* the only argument is the address id */
+	if (argc != 3)
+		syntax(argv);
+
+	id = atoi(argv[2]);
+
+	nest_start = off;
+	nest = (void *)(data + off);
+	nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	nest->rta_len =  RTA_LENGTH(0);
+	off += NLMSG_ALIGN(nest->rta_len);
+
+	/* build a dummy addr with only the ID set */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+	rta->rta_len = RTA_LENGTH(1);
+	memcpy(RTA_DATA(rta), &id, 1);
+	off += NLMSG_ALIGN(rta->rta_len);
+	nest->rta_len = off - nest_start;
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
+static void print_addr(struct rtattr *attrs, int len)
+{
+	uint16_t family = 0;
+	char str[1024];
+	uint32_t flags;
+	uint8_t id;
+
+	while (RTA_OK(attrs, len)) {
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
+			memcpy(&family, RTA_DATA(attrs), 2);
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
+			if (family != AF_INET)
+				error(1, errno, "wrong IP (v4) for family %d",
+				      family);
+			inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
+			printf("%s", str);
+		}
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
+			if (family != AF_INET6)
+				error(1, errno, "wrong IP (v6) for family %d",
+				      family);
+			inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
+			printf("%s", str);
+		}
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
+			memcpy(&id, RTA_DATA(attrs), 1);
+			printf("id %d ", id);
+		}
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FLAGS) {
+			memcpy(&flags, RTA_DATA(attrs), 4);
+
+			printf("flags ");
+			if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+				printf("signal");
+				flags &= ~MPTCP_PM_ADDR_FLAG_SIGNAL;
+				if (flags)
+					printf(",");
+			}
+
+			if (flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+				printf("subflow");
+				flags &= ~MPTCP_PM_ADDR_FLAG_SUBFLOW;
+				if (flags)
+					printf(",");
+			}
+
+			if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
+				printf("backup");
+				flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+				if (flags)
+					printf(",");
+			}
+
+			/* bump unknown flags, if any */
+			if (flags)
+				printf("0x%x", flags);
+			printf(" ");
+		}
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_IF_IDX) {
+			char name[IF_NAMESIZE], *ret;
+			int32_t ifindex;
+
+			memcpy(&ifindex, RTA_DATA(attrs), 4);
+			ret = if_indextoname(ifindex, name);
+			if (ret)
+				printf("dev %s ", ret);
+			else
+				printf("dev unknown/%d", ifindex);
+		}
+
+		attrs = RTA_NEXT(attrs, len);
+	}
+	printf("\n");
+}
+
+static void print_addrs(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+	struct rtattr *attrs;
+
+	for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+		int len = nh->nlmsg_len;
+
+		if (nh->nlmsg_type == NLMSG_DONE)
+			break;
+		if (nh->nlmsg_type == NLMSG_ERROR)
+			nl_error(nh);
+		if (nh->nlmsg_type != pm_family)
+			continue;
+
+		len -= NLMSG_LENGTH(GENL_HDRLEN);
+		attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+					   GENL_HDRLEN);
+		while (RTA_OK(attrs, len)) {
+			if (attrs->rta_type ==
+			    (MPTCP_PM_ATTR_ADDR | NLA_F_NESTED))
+				print_addr((void *)RTA_DATA(attrs),
+					   attrs->rta_len);
+			attrs = RTA_NEXT(attrs, len);
+		}
+	}
+}
+
+int get_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *nest;
+	struct nlmsghdr *nh;
+	int nest_start;
+	u_int8_t id;
+	int off = 0;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+			    MPTCP_PM_VER);
+
+	/* the only argument is the address id */
+	if (argc != 3)
+		syntax(argv);
+
+	id = atoi(argv[2]);
+
+	nest_start = off;
+	nest = (void *)(data + off);
+	nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	nest->rta_len =  RTA_LENGTH(0);
+	off += NLMSG_ALIGN(nest->rta_len);
+
+	/* build a dummy addr with only the ID set */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+	rta->rta_len = RTA_LENGTH(1);
+	memcpy(RTA_DATA(rta), &id, 1);
+	off += NLMSG_ALIGN(rta->rta_len);
+	nest->rta_len = off - nest_start;
+
+	print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+	return 0;
+}
+
+int dump_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	pid_t pid = getpid();
+	struct nlmsghdr *nh;
+	int off = 0;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+			    MPTCP_PM_VER);
+	nh->nlmsg_flags |= NLM_F_DUMP;
+	nh->nlmsg_seq = 1;
+	nh->nlmsg_pid = pid;
+	nh->nlmsg_len = off;
+
+	print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+	return 0;
+}
+
+int flush_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct nlmsghdr *nh;
+	int off = 0;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_FLUSH_ADDRS,
+			    MPTCP_PM_VER);
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
+static void print_limits(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+	struct rtattr *attrs;
+	uint32_t max;
+
+	for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+		int len = nh->nlmsg_len;
+
+		if (nh->nlmsg_type == NLMSG_DONE)
+			break;
+		if (nh->nlmsg_type == NLMSG_ERROR)
+			nl_error(nh);
+		if (nh->nlmsg_type != pm_family)
+			continue;
+
+		len -= NLMSG_LENGTH(GENL_HDRLEN);
+		attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+					   GENL_HDRLEN);
+		while (RTA_OK(attrs, len)) {
+			int type = attrs->rta_type;
+
+			if (type != MPTCP_PM_ATTR_RCV_ADD_ADDRS &&
+			    type != MPTCP_PM_ATTR_SUBFLOWS)
+				goto next;
+
+			memcpy(&max, RTA_DATA(attrs), 4);
+			printf("%s %u\n", type == MPTCP_PM_ATTR_SUBFLOWS ?
+					  "subflows" : "accept", max);
+
+next:
+			attrs = RTA_NEXT(attrs, len);
+		}
+	}
+}
+
+int get_set_limits(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	uint32_t rcv_addr = 0, subflows = 0;
+	int cmd, len = sizeof(data);
+	struct nlmsghdr *nh;
+	int off = 0;
+
+	/* limit */
+	if (argc == 4) {
+		rcv_addr = atoi(argv[2]);
+		subflows = atoi(argv[3]);
+		cmd = MPTCP_PM_CMD_SET_LIMITS;
+	} else {
+		cmd = MPTCP_PM_CMD_GET_LIMITS;
+	}
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, cmd, MPTCP_PM_VER);
+
+	/* limit */
+	if (cmd == MPTCP_PM_CMD_SET_LIMITS) {
+		struct rtattr *rta = (void *)(data + off);
+
+		rta->rta_type = MPTCP_PM_ATTR_RCV_ADD_ADDRS;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &rcv_addr, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ATTR_SUBFLOWS;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &subflows, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* do not expect a reply */
+		len = 0;
+	}
+
+	len = do_nl_req(fd, nh, off, len);
+	if (cmd == MPTCP_PM_CMD_GET_LIMITS)
+		print_limits(nh, pm_family, len);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int fd, pm_family;
+
+	if (argc < 2)
+		syntax(argv);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	if (fd == -1)
+		error(1, errno, "socket netlink");
+
+	pm_family = resolve_mptcp_pm_netlink(fd);
+
+	if (!strcmp(argv[1], "add"))
+		return add_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "del"))
+		return del_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "flush"))
+		return flush_addrs(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "get"))
+		return get_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "dump"))
+		return dump_addrs(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "limits"))
+		return get_set_limits(fd, pm_family, argc, argv);
+
+	fprintf(stderr, "unknown sub-command: %s", argv[1]);
+	syntax(argv);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
new file mode 100644
index 000000000000..7b01b7c2ec10
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Check if we can fully utilize 4-tuples for connect().
+ *
+ * Rules to bind sockets to the same port when all ephemeral ports are
+ * exhausted.
+ *
+ *   1. if there are TCP_LISTEN sockets on the port, fail to bind.
+ *   2. if there are sockets without SO_REUSEADDR, fail to bind.
+ *   3. if SO_REUSEADDR is disabled, fail to bind.
+ *   4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled,
+ *        succeed to bind.
+ *   5. if SO_REUSEADDR and SO_REUSEPORT are enabled and
+ *        there is no socket having the both options and the same EUID,
+ *        succeed to bind.
+ *   6. fail to bind.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+
+struct reuse_opts {
+	int reuseaddr[2];
+	int reuseport[2];
+};
+
+struct reuse_opts unreusable_opts[12] = {
+	{0, 0, 0, 0},
+	{0, 0, 0, 1},
+	{0, 0, 1, 0},
+	{0, 0, 1, 1},
+	{0, 1, 0, 0},
+	{0, 1, 0, 1},
+	{0, 1, 1, 0},
+	{0, 1, 1, 1},
+	{1, 0, 0, 0},
+	{1, 0, 0, 1},
+	{1, 0, 1, 0},
+	{1, 0, 1, 1},
+};
+
+struct reuse_opts reusable_opts[4] = {
+	{1, 1, 0, 0},
+	{1, 1, 0, 1},
+	{1, 1, 1, 0},
+	{1, 1, 1, 1},
+};
+
+int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
+{
+	struct sockaddr_in local_addr;
+	int len = sizeof(local_addr);
+	int fd, ret;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_NE(-1, fd) TH_LOG("failed to open socket.");
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int));
+	ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR.");
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int));
+	ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT.");
+
+	local_addr.sin_family = AF_INET;
+	local_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+	local_addr.sin_port = 0;
+
+	if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) {
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+TEST(reuseaddr_ports_exhausted_unreusable)
+{
+	struct reuse_opts *opts;
+	int i, j, fd[2];
+
+	for (i = 0; i < 12; i++) {
+		opts = &unreusable_opts[i];
+
+		for (j = 0; j < 2; j++)
+			fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+		ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+		EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind.");
+
+		for (j = 0; j < 2; j++)
+			if (fd[j] != -1)
+				close(fd[j]);
+	}
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_same_euid)
+{
+	struct reuse_opts *opts;
+	int i, j, fd[2];
+
+	for (i = 0; i < 4; i++) {
+		opts = &reusable_opts[i];
+
+		for (j = 0; j < 2; j++)
+			fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+		ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+
+		if (opts->reuseport[0] && opts->reuseport[1]) {
+			EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+		} else {
+			EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
+		}
+
+		for (j = 0; j < 2; j++)
+			if (fd[j] != -1)
+				close(fd[j]);
+	}
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_different_euid)
+{
+	struct reuse_opts *opts;
+	int i, j, ret, fd[2];
+	uid_t euid[2] = {10, 20};
+
+	for (i = 0; i < 4; i++) {
+		opts = &reusable_opts[i];
+
+		for (j = 0; j < 2; j++) {
+			ret = seteuid(euid[j]);
+			ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]);
+
+			fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+			ret = seteuid(0);
+			ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0.");
+		}
+
+		ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+		EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid.");
+
+		if (fd[1] != -1) {
+			ret = listen(fd[0], 5);
+			ASSERT_EQ(0, ret) TH_LOG("failed to listen.");
+
+			ret = listen(fd[1], 5);
+			EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN.");
+		}
+
+		for (j = 0; j < 2; j++)
+			if (fd[j] != -1)
+				close(fd[j]);
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
new file mode 100755
index 000000000000..20e3a2913d06
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run tests when all ephemeral ports are exhausted.
+#
+# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" \
+		sysctl -w net.ipv4.ip_local_port_range="32768 32768" \
+		> /dev/null 2>&1
+	ip netns exec "${NETNS}" \
+		sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+	ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted
+}
+
+do_test
+echo "tests done"
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index c6233935fed1..b8475cb29be7 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -21,6 +21,10 @@
 #include <sys/socket.h>
 #include <unistd.h>
 
+#ifndef SOL_DCCP
+#define SOL_DCCP 269
+#endif
+
 static const char *IP4_ADDR = "127.0.0.1";
 static const char *IP6_ADDR = "::1";
 static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 6dee9e636a95..6dee9e636a95 100644
--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aca3491174a1..aca3491174a1 100644
--- a/tools/testing/selftests/networking/timestamping/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 7e386be47120..011b0da6b033 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -41,6 +41,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/epoll.h>
 #include <sys/ioctl.h>
 #include <sys/select.h>
 #include <sys/socket.h>
@@ -49,6 +50,10 @@
 #include <time.h>
 #include <unistd.h>
 
+#define NSEC_PER_USEC	1000L
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000LL
+
 /* command line parameters */
 static int cfg_proto = SOCK_STREAM;
 static int cfg_ipproto = IPPROTO_TCP;
@@ -61,12 +66,16 @@ static int cfg_delay_snd;
 static int cfg_delay_ack;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
+static bool cfg_busy_poll;
+static int cfg_sleep_usec = 50 * 1000;
 static bool cfg_loop_nodata;
-static bool cfg_no_delay;
 static bool cfg_use_cmsg;
 static bool cfg_use_pf_packet;
+static bool cfg_use_epoll;
+static bool cfg_epollet;
 static bool cfg_do_listen;
 static uint16_t dest_port = 9000;
+static bool cfg_print_nsec;
 
 static struct sockaddr_in daddr;
 static struct sockaddr_in6 daddr6;
@@ -75,11 +84,48 @@ static struct timespec ts_usr;
 static int saved_tskey = -1;
 static int saved_tskey_type = -1;
 
+struct timing_event {
+	int64_t min;
+	int64_t max;
+	int64_t total;
+	int count;
+};
+
+static struct timing_event usr_enq;
+static struct timing_event usr_snd;
+static struct timing_event usr_ack;
+
 static bool test_failed;
 
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+	return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
 static int64_t timespec_to_us64(struct timespec *ts)
 {
-	return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000;
+	return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC;
+}
+
+static void init_timing_event(struct timing_event *te)
+{
+	te->min = INT64_MAX;
+	te->max = 0;
+	te->total = 0;
+	te->count = 0;
+}
+
+static void add_timing_event(struct timing_event *te,
+		struct timespec *t_start, struct timespec *t_end)
+{
+	int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start);
+
+	te->count++;
+	if (ts_delta < te->min)
+		te->min = ts_delta;
+	if (ts_delta > te->max)
+		te->max = ts_delta;
+	te->total += ts_delta;
 }
 
 static void validate_key(int tskey, int tstype)
@@ -113,25 +159,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
 	start64 = timespec_to_us64(&ts_usr);
 
 	if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
-		fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n",
+		fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
 				cur64 - start64, min_delay, max_delay);
 		test_failed = true;
 	}
 }
 
+static void __print_ts_delta_formatted(int64_t ts_delta)
+{
+	if (cfg_print_nsec)
+		fprintf(stderr, "%lu ns", ts_delta);
+	else
+		fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+}
+
 static void __print_timestamp(const char *name, struct timespec *cur,
 			      uint32_t key, int payload_len)
 {
+	int64_t ts_delta;
+
 	if (!(cur->tv_sec | cur->tv_nsec))
 		return;
 
-	fprintf(stderr, "  %s: %lu s %lu us (seq=%u, len=%u)",
-			name, cur->tv_sec, cur->tv_nsec / 1000,
-			key, payload_len);
-
-	if (cur != &ts_usr)
-		fprintf(stderr, "  (USR %+" PRId64 " us)",
-			timespec_to_us64(cur) - timespec_to_us64(&ts_usr));
+	if (cfg_print_nsec)
+		fprintf(stderr, "  %s: %lu s %lu ns (seq=%u, len=%u)",
+				name, cur->tv_sec, cur->tv_nsec,
+				key, payload_len);
+	else
+		fprintf(stderr, "  %s: %lu s %lu us (seq=%u, len=%u)",
+				name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC,
+				key, payload_len);
+
+	if (cur != &ts_usr) {
+		ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr);
+		fprintf(stderr, "  (USR +");
+		__print_ts_delta_formatted(ts_delta);
+		fprintf(stderr, ")");
+	}
 
 	fprintf(stderr, "\n");
 }
@@ -155,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
 	case SCM_TSTAMP_SCHED:
 		tsname = "  ENQ";
 		validate_timestamp(&tss->ts[0], 0);
+		add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]);
 		break;
 	case SCM_TSTAMP_SND:
 		tsname = "  SND";
 		validate_timestamp(&tss->ts[0], cfg_delay_snd);
+		add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]);
 		break;
 	case SCM_TSTAMP_ACK:
 		tsname = "  ACK";
 		validate_timestamp(&tss->ts[0], cfg_delay_ack);
+		add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]);
 		break;
 	default:
 		error(1, 0, "unknown timestamp type: %u",
@@ -171,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
 	__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
 }
 
+static void print_timing_event(char *name, struct timing_event *te)
+{
+	if (!te->count)
+		return;
+
+	fprintf(stderr, "    %s: count=%d", name, te->count);
+	fprintf(stderr, ", avg=");
+	__print_ts_delta_formatted((int64_t)(te->total / te->count));
+	fprintf(stderr, ", min=");
+	__print_ts_delta_formatted(te->min);
+	fprintf(stderr, ", max=");
+	__print_ts_delta_formatted(te->max);
+	fprintf(stderr, "\n");
+}
+
 /* TODO: convert to check_and_print payload once API is stable */
 static void print_payload(char *data, int len)
 {
@@ -198,6 +280,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
 		daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
 }
 
+static void __epoll(int epfd)
+{
+	struct epoll_event events;
+	int ret;
+
+	memset(&events, 0, sizeof(events));
+	ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout);
+	if (ret != 1)
+		error(1, errno, "epoll_wait");
+}
+
 static void __poll(int fd)
 {
 	struct pollfd pollfd;
@@ -391,7 +484,11 @@ static void do_test(int family, unsigned int report_opt)
 	struct msghdr msg;
 	struct iovec iov;
 	char *buf;
-	int fd, i, val = 1, total_len;
+	int fd, i, val = 1, total_len, epfd = 0;
+
+	init_timing_event(&usr_enq);
+	init_timing_event(&usr_snd);
+	init_timing_event(&usr_ack);
 
 	total_len = cfg_payload_len;
 	if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) {
@@ -418,6 +515,20 @@ static void do_test(int family, unsigned int report_opt)
 	if (fd < 0)
 		error(1, errno, "socket");
 
+	if (cfg_use_epoll) {
+		struct epoll_event ev;
+
+		memset(&ev, 0, sizeof(ev));
+		ev.data.fd = fd;
+		if (cfg_epollet)
+			ev.events |= EPOLLET;
+		epfd = epoll_create(1);
+		if (epfd <= 0)
+			error(1, errno, "epoll_create");
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+			error(1, errno, "epoll_ctl");
+	}
+
 	/* reset expected key on each new socket */
 	saved_tskey = -1;
 
@@ -525,19 +636,28 @@ static void do_test(int family, unsigned int report_opt)
 			error(1, errno, "send");
 
 		/* wait for all errors to be queued, else ACKs arrive OOO */
-		if (!cfg_no_delay)
-			usleep(50 * 1000);
+		if (cfg_sleep_usec)
+			usleep(cfg_sleep_usec);
 
-		__poll(fd);
+		if (!cfg_busy_poll) {
+			if (cfg_use_epoll)
+				__epoll(epfd);
+			else
+				__poll(fd);
+		}
 
 		while (!recv_errmsg(fd)) {}
 	}
 
+	print_timing_event("USR-ENQ", &usr_enq);
+	print_timing_event("USR-SND", &usr_snd);
+	print_timing_event("USR-ACK", &usr_ack);
+
 	if (close(fd))
 		error(1, errno, "close");
 
 	free(buf);
-	usleep(100 * 1000);
+	usleep(100 * NSEC_PER_USEC);
 }
 
 static void __attribute__((noreturn)) usage(const char *filepath)
@@ -547,18 +667,22 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -4:   only IPv4\n"
 			"  -6:   only IPv6\n"
 			"  -h:   show this message\n"
+			"  -b:   busy poll to read from error queue\n"
 			"  -c N: number of packets for each test\n"
 			"  -C:   use cmsg to set tstamp recording options\n"
-			"  -D:   no delay between packets\n"
-			"  -F:   poll() waits forever for an event\n"
+			"  -e:   use level-triggered epoll() instead of poll()\n"
+			"  -E:   use event-triggered epoll() instead of poll()\n"
+			"  -F:   poll()/epoll() waits forever for an event\n"
 			"  -I:   request PKTINFO\n"
 			"  -l N: send N bytes at a time\n"
 			"  -L    listen on hostname and port\n"
 			"  -n:   set no-payload option\n"
+			"  -N:   print timestamps and durations in nsec (instead of usec)\n"
 			"  -p N: connect to port N\n"
 			"  -P:   use PF_PACKET\n"
 			"  -r:   use raw\n"
 			"  -R:   use raw (IP_HDRINCL)\n"
+			"  -S N: usec to sleep before reading error queue\n"
 			"  -u:   use udp\n"
 			"  -v:   validate SND delay (usec)\n"
 			"  -V:   validate ACK delay (usec)\n"
@@ -572,7 +696,8 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	int c;
 
-	while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) {
+	while ((c = getopt(argc, argv,
+				"46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -580,15 +705,21 @@ static void parse_opt(int argc, char **argv)
 		case '6':
 			do_ipv4 = 0;
 			break;
+		case 'b':
+			cfg_busy_poll = true;
+			break;
 		case 'c':
 			cfg_num_pkts = strtoul(optarg, NULL, 10);
 			break;
 		case 'C':
 			cfg_use_cmsg = true;
 			break;
-		case 'D':
-			cfg_no_delay = true;
+		case 'e':
+			cfg_use_epoll = true;
 			break;
+		case 'E':
+			cfg_use_epoll = true;
+			cfg_epollet = true;
 		case 'F':
 			cfg_poll_timeout = -1;
 			break;
@@ -604,6 +735,9 @@ static void parse_opt(int argc, char **argv)
 		case 'n':
 			cfg_loop_nodata = true;
 			break;
+		case 'N':
+			cfg_print_nsec = true;
+			break;
 		case 'p':
 			dest_port = strtoul(optarg, NULL, 10);
 			break;
@@ -623,6 +757,9 @@ static void parse_opt(int argc, char **argv)
 			cfg_proto = SOCK_RAW;
 			cfg_ipproto = IPPROTO_RAW;
 			break;
+		case 'S':
+			cfg_sleep_usec = strtoul(optarg, NULL, 10);
+			break;
 		case 'u':
 			proto_count++;
 			cfg_proto = SOCK_DGRAM;
@@ -653,6 +790,8 @@ static void parse_opt(int argc, char **argv)
 		error(1, 0, "pass -P, -r, -R or -u, not multiple");
 	if (cfg_do_pktinfo && cfg_use_pf_packet)
 		error(1, 0, "cannot ask for pktinfo over pf_packet");
+	if (cfg_busy_poll && cfg_use_epoll)
+		error(1, 0, "pass epoll or busy_poll, not both");
 
 	if (optind != argc - 1)
 		error(1, 0, "missing required hostname argument");
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index df0d86ca72b7..eea6f5193693 100755
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -43,15 +43,40 @@ run_test_tcpudpraw() {
 }
 
 run_test_all() {
+	setup
 	run_test_tcpudpraw		# setsockopt
 	run_test_tcpudpraw -C		# cmsg
 	run_test_tcpudpraw -n		# timestamp w/o data
+	echo "OK. All tests passed"
+}
+
+run_test_one() {
+	setup
+	./txtimestamp $@
+}
+
+usage() {
+	echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]"
+	echo "  (no args)  Run all tests"
+	echo "  -r|--run  Run an individual test with arguments"
+	echo "  -h|--help Help"
+}
+
+main() {
+	if [[ $# -eq 0 ]]; then
+		run_test_all
+	else
+		if [[ "$1" = "-r" || "$1" == "--run" ]]; then
+			shift
+			run_test_one $@
+		else
+			usage
+		fi
+	fi
 }
 
 if [[ "$(ip netns identify)" == "root" ]]; then
-	../../net/in_netns.sh $0 $@
+	./in_netns.sh $0 $@
 else
-	setup
-	run_test_all
-	echo "OK. All tests passed"
+	main $@
 fi
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 08194aa44006..9c0f758310fe 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -3,6 +3,10 @@
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
-	nft_concat_range.sh
+	nft_concat_range.sh \
+	nft_queue.sh
+
+LDLIBS = -lmnl
+TEST_GEN_FILES =  nf-queue
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 59caa8f71cd8..4faf2ce021d9 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -1,2 +1,8 @@
 CONFIG_NET_NS=y
 CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NF_CT_NETLINK=m
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
new file mode 100644
index 000000000000..29c73bce38fa
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+
+struct options {
+	bool count_packets;
+	int verbose;
+	unsigned int queue_num;
+	unsigned int timeout;
+};
+
+static unsigned int queue_stats[5];
+static struct options opts;
+
+static void help(const char *p)
+{
+	printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+}
+
+static int parse_attr_cb(const struct nlattr *attr, void *data)
+{
+	const struct nlattr **tb = data;
+	int type = mnl_attr_get_type(attr);
+
+	/* skip unsupported attribute in user-space */
+	if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
+		return MNL_CB_OK;
+
+	switch (type) {
+	case NFQA_MARK:
+	case NFQA_IFINDEX_INDEV:
+	case NFQA_IFINDEX_OUTDEV:
+	case NFQA_IFINDEX_PHYSINDEV:
+	case NFQA_IFINDEX_PHYSOUTDEV:
+		if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
+			perror("mnl_attr_validate");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_TIMESTAMP:
+		if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+		    sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
+			perror("mnl_attr_validate2");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_HWADDR:
+		if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+		    sizeof(struct nfqnl_msg_packet_hw)) < 0) {
+			perror("mnl_attr_validate2");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_PAYLOAD:
+		break;
+	}
+	tb[type] = attr;
+	return MNL_CB_OK;
+}
+
+static int queue_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nlattr *tb[NFQA_MAX+1] = { 0 };
+	struct nfqnl_msg_packet_hdr *ph = NULL;
+	uint32_t id = 0;
+
+	(void)data;
+
+	mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
+	if (tb[NFQA_PACKET_HDR]) {
+		ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
+		id = ntohl(ph->packet_id);
+
+		if (opts.verbose > 0)
+			printf("packet hook=%u, hwproto 0x%x",
+				ntohs(ph->hw_protocol), ph->hook);
+
+		if (ph->hook >= 5) {
+			fprintf(stderr, "Unknown hook %d\n", ph->hook);
+			return MNL_CB_ERROR;
+		}
+
+		if (opts.verbose > 0) {
+			uint32_t skbinfo = 0;
+
+			if (tb[NFQA_SKB_INFO])
+				skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
+			if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+				printf(" csumnotready");
+			if (skbinfo & NFQA_SKB_GSO)
+				printf(" gso");
+			if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
+				printf(" csumnotverified");
+			puts("");
+		}
+
+		if (opts.count_packets)
+			queue_stats[ph->hook]++;
+	}
+
+	return MNL_CB_OK + id;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
+{
+	struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+	struct nfqnl_msg_config_cmd cmd = {
+		.command = command,
+		.pf = htons(AF_INET),
+	};
+	struct nfgenmsg *nfg;
+
+	nlh->nlmsg_type	= (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
+
+	return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
+{
+	struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+	struct nfqnl_msg_config_params params = {
+		.copy_range = htonl(range),
+		.copy_mode = mode,
+	};
+	struct nfgenmsg *nfg;
+
+	nlh->nlmsg_type	= (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
+
+	return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+{
+	struct nfqnl_msg_verdict_hdr vh = {
+		.verdict = htonl(verd),
+		.id = htonl(id),
+	};
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfg;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
+
+	return nlh;
+}
+
+static void print_stats(void)
+{
+	unsigned int last, total;
+	int i;
+
+	if (!opts.count_packets)
+		return;
+
+	total = 0;
+	last = queue_stats[0];
+
+	for (i = 0; i < 5; i++) {
+		printf("hook %d packets %08u\n", i, queue_stats[i]);
+		last = queue_stats[i];
+		total += last;
+	}
+
+	printf("%u packets total\n", total);
+}
+
+struct mnl_socket *open_queue(void)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	unsigned int queue_num;
+	struct mnl_socket *nl;
+	struct nlmsghdr *nlh;
+	struct timeval tv;
+	uint32_t flags;
+
+	nl = mnl_socket_open(NETLINK_NETFILTER);
+	if (nl == NULL) {
+		perror("mnl_socket_open");
+		exit(EXIT_FAILURE);
+	}
+
+	if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+		perror("mnl_socket_bind");
+		exit(EXIT_FAILURE);
+	}
+
+	queue_num = opts.queue_num;
+	nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
+
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		exit(EXIT_FAILURE);
+	}
+
+	nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
+
+	flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+	mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
+	mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
+
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&tv, 0, sizeof(tv));
+	tv.tv_sec = opts.timeout;
+	if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
+				       SOL_SOCKET, SO_RCVTIMEO,
+				       &tv, sizeof(tv))) {
+		perror("setsockopt(SO_RCVTIMEO)");
+		exit(EXIT_FAILURE);
+	}
+
+	return nl;
+}
+
+static int mainloop(void)
+{
+	unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
+	struct mnl_socket *nl;
+	struct nlmsghdr *nlh;
+	unsigned int portid;
+	char *buf;
+	int ret;
+
+	buf = malloc(buflen);
+	if (!buf) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	nl = open_queue();
+	portid = mnl_socket_get_portid(nl);
+
+	for (;;) {
+		uint32_t id;
+
+		ret = mnl_socket_recvfrom(nl, buf, buflen);
+		if (ret == -1) {
+			if (errno == ENOBUFS)
+				continue;
+
+			if (errno == EAGAIN) {
+				errno = 0;
+				ret = 0;
+				break;
+			}
+
+			perror("mnl_socket_recvfrom");
+			exit(EXIT_FAILURE);
+		}
+
+		ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
+		if (ret < 0) {
+			perror("mnl_cb_run");
+			exit(EXIT_FAILURE);
+		}
+
+		id = ret - MNL_CB_OK;
+		nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+			perror("mnl_socket_sendto");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	mnl_socket_close(nl);
+
+	return ret;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+		switch (c) {
+		case 'c':
+			opts.count_packets = true;
+			break;
+		case 'h':
+			help(argv[0]);
+			exit(0);
+			break;
+		case 'q':
+			opts.queue_num = atoi(optarg);
+			if (opts.queue_num > 0xffff)
+				opts.queue_num = 0;
+			break;
+		case 't':
+			opts.timeout = atoi(optarg);
+			break;
+		case 'v':
+			opts.verbose++;
+			break;
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	parse_opts(argc, argv);
+
+	ret = mainloop();
+	if (opts.count_packets)
+		print_stats();
+
+	return ret;
+}
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
new file mode 100755
index 000000000000..6898448b4266
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+
+cleanup()
+{
+	ip netns del ${ns1}
+	ip netns del ${ns2}
+	ip netns del ${nsrouter}
+	rm -f "$TMPFILE0"
+	rm -f "$TMPFILE1"
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace"
+	exit $ksft_skip
+fi
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+trap cleanup EXIT
+
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+load_ruleset() {
+	local name=$1
+	local prio=$2
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet $name {
+	chain nfq {
+		ip protocol icmp queue bypass
+		icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+	}
+	chain pre {
+		type filter hook prerouting priority $prio; policy accept;
+		jump nfq
+	}
+	chain input {
+		type filter hook input priority $prio; policy accept;
+		jump nfq
+	}
+	chain forward {
+		type filter hook forward priority $prio; policy accept;
+		tcp dport 12345 queue num 2
+		jump nfq
+	}
+	chain output {
+		type filter hook output priority $prio; policy accept;
+		tcp dport 12345 queue num 3
+		jump nfq
+	}
+	chain post {
+		type filter hook postrouting priority $prio; policy accept;
+		jump nfq
+	}
+}
+EOF
+}
+
+load_counter_ruleset() {
+	local prio=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet countrules {
+	chain pre {
+		type filter hook prerouting priority $prio; policy accept;
+		counter
+	}
+	chain input {
+		type filter hook input priority $prio; policy accept;
+		counter
+	}
+	chain forward {
+		type filter hook forward priority $prio; policy accept;
+		counter
+	}
+	chain output {
+		type filter hook output priority $prio; policy accept;
+		counter
+	}
+	chain post {
+		type filter hook postrouting priority $prio; policy accept;
+		counter
+	}
+}
+EOF
+}
+
+test_ping() {
+  ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+  if [ $? -ne 0 ];then
+	return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+  if [ $? -ne 0 ];then
+	return 1
+  fi
+
+  return 0
+}
+
+test_ping_router() {
+  ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
+  if [ $? -ne 0 ];then
+	return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
+  if [ $? -ne 0 ];then
+	return 1
+  fi
+
+  return 0
+}
+
+test_queue_blackhole() {
+	local proto=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table $proto blackh {
+	chain forward {
+	type filter hook forward priority 0; policy accept;
+		queue num 600
+	}
+}
+EOF
+	if [ $proto = "ip" ] ;then
+		ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+		lret=$?
+	elif [ $proto = "ip6" ]; then
+		ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+		lret=$?
+	else
+		lret=111
+	fi
+
+	# queue without bypass keyword should drop traffic if no listener exists.
+	if [ $lret -eq 0 ];then
+		echo "FAIL: $proto expected failure, got $lret" 1>&2
+		exit 1
+	fi
+
+	ip netns exec ${nsrouter} nft delete table $proto blackh
+	if [ $? -ne 0 ] ;then
+	        echo "FAIL: $proto: Could not delete blackh table"
+	        exit 1
+	fi
+
+        echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+test_queue()
+{
+	local expected=$1
+	local last=""
+
+	# spawn nf-queue listeners
+	ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
+	ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+	sleep 1
+	test_ping
+	ret=$?
+	if [ $ret -ne 0 ];then
+		echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
+		exit $ret
+	fi
+
+	test_ping_router
+	ret=$?
+	if [ $ret -ne 0 ];then
+		echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
+		exit $ret
+	fi
+
+	wait
+	ret=$?
+
+	for file in $TMPFILE0 $TMPFILE1; do
+		last=$(tail -n1 "$file")
+		if [ x"$last" != x"$expected packets total" ]; then
+			echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+			cat "$file" 1>&2
+
+			ip netns exec ${nsrouter} nft list ruleset
+			exit 1
+		fi
+	done
+
+	echo "PASS: Expected and received $last"
+}
+
+test_tcp_forward()
+{
+	ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+	local nfqpid=$!
+
+	tmpfile=$(mktemp) || exit 1
+	dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+	ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+	local rpid=$!
+
+	sleep 1
+	ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
+
+	rm -f "$tmpfile"
+
+	wait $rpid
+	wait $lpid
+	[ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+	tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
+
+	tmpfile=$(mktemp) || exit 1
+
+	dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+	ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+	local rpid=$!
+
+	ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+	local nfqpid=$!
+
+	sleep 1
+	ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+	rm -f "$tmpfile"
+
+	wait $rpid
+	[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+sleep 3
+
+test_ping
+ret=$?
+if [ $ret -eq 0 ];then
+	# queue bypass works (rules were skipped, no listener)
+	echo "PASS: ${ns1} can reach ${ns2}"
+else
+	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+	exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules.  We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same.  We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+
+exit $ret
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
deleted file mode 100644
index d9355035e746..000000000000
--- a/tools/testing/selftests/networking/timestamping/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-timestamping
-rxtimestamp
-txtimestamp
-hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
deleted file mode 100644
index 1de8bd8ccf5d..000000000000
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include
-
-TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_PROGS := txtimestamp.sh
-
-all: $(TEST_PROGS)
-
-top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config
deleted file mode 100644
index a13e3169b0a4..000000000000
--- a/tools/testing/selftests/networking/timestamping/config
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_IFB=y
-CONFIG_NET_SCH_NETEM=y
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore
index 2ab2c824ce86..ed79ebdf286e 100644
--- a/tools/testing/selftests/nsfs/.gitignore
+++ b/tools/testing/selftests/nsfs/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 owner
 pidns
diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/openat2/.gitignore
index bd68f6c3fd07..82a4846cbc4b 100644
--- a/tools/testing/selftests/openat2/.gitignore
+++ b/tools/testing/selftests/openat2/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /*_test
diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore
new file mode 100644
index 000000000000..93ab9d7e5b7e
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/.gitignore
@@ -0,0 +1 @@
+regression_enomem
diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
new file mode 100644
index 000000000000..dcaefa224ca0
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := regression_enomem
+
+include ../lib.mk
+
+$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
diff --git a/tools/testing/selftests/pid_namespace/config b/tools/testing/selftests/pid_namespace/config
new file mode 100644
index 000000000000..26cdb27e7dbb
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/config
@@ -0,0 +1,2 @@
+CONFIG_PID_NS=y
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
new file mode 100644
index 000000000000..73d532556d17
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -0,0 +1,45 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+/*
+ * Regression test for:
+ * 35f71bc0a09a ("fork: report pid reservation failure properly")
+ * b26ebfe12f34 ("pid: Fix error return value in some cases")
+ */
+TEST(regression_enomem)
+{
+	pid_t pid;
+
+	if (geteuid())
+		EXPECT_EQ(0, unshare(CLONE_NEWUSER));
+
+	EXPECT_EQ(0, unshare(CLONE_NEWPID));
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0)
+		exit(EXIT_SUCCESS);
+
+	EXPECT_EQ(0, wait_for_pid(pid));
+
+	pid = fork();
+	ASSERT_LT(pid, 0);
+	ASSERT_EQ(errno, ENOMEM);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 3a779c084d96..2d4db5afb142 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -1,5 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
 pidfd_open_test
 pidfd_poll_test
 pidfd_test
 pidfd_wait
+pidfd_fdinfo_test
 pidfd_getfd_test
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index d482515604db..c1921a53dbed 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -13,6 +13,8 @@
 #include <string.h>
 #include <syscall.h>
 #include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 
 #include "../kselftest.h"
 
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 6d4fd014511c..28bc6ca13cc6 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 copy_first_unaligned
 alignment_handler
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 9161679b1e1a..c9ce13983c99 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gettimeofday
 context_switch
 fork
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
index ec1848434be5..b385eee3012c 100644
--- a/tools/testing/selftests/powerpc/cache_shape/.gitignore
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 cache_shape
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 12ef5b031974..ddaf140b8255 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 copyuser_64_t0
 copyuser_64_t1
 copyuser_64_t2
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
index b585c6c1564a..1d08b15af697 100644
--- a/tools/testing/selftests/powerpc/dscr/.gitignore
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 dscr_default_test
 dscr_explicit_test
 dscr_inherit_exec_test
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index 50ded63e25b7..e31ca6f453ed 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 fpu_syscall
 vmx_syscall
 fpu_preempt
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 0ebeaea22641..7cf7ad261d02 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
index e748f336eed3..ff7896903d7b 100644
--- a/tools/testing/selftests/powerpc/pmu/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 count_instructions
 l3_bank_test
 per_event_excludes
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
index 42bddbed8b64..2920fb39439b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 reg_access_test
 event_attributes_test
 cycles_test
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
index 4cc4e31bed1d..1e5c04e24254 100644
--- a/tools/testing/selftests/powerpc/primitives/.gitignore
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index dce19f221c46..0e96150b7c7e 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ptrace-gpr
 ptrace-tm-gpr
 ptrace-tm-spd-gpr
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index 0b969fba3beb..f795e06f5ae3 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 rfi_flush
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index dca5852a1546..f897b55a44dd 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 signal
 signal_tm
 sigfuz
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
index 31a17e0ba884..b0dfc74aa57e 100644
--- a/tools/testing/selftests/powerpc/stringloops/.gitignore
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 memcmp_64
 memcmp_32
 strlen
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
index 89e762eab676..30e962cf84d1 100644
--- a/tools/testing/selftests/powerpc/switch_endian/.gitignore
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 switch_endian_test
 check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
index f0f3fcc9d802..b00cab225476 100644
--- a/tools/testing/selftests/powerpc/syscalls/.gitignore
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 98f2708d86cc..7baf2a46002f 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 tm-resched-dscr
 tm-syscall
 tm-signal-msr-resv
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
index 7c04395010cb..b744aedfd1f2 100644
--- a/tools/testing/selftests/powerpc/vphn/.gitignore
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 test-vphn
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
index 0b5c27447bf6..91af2b631bc9 100644
--- a/tools/testing/selftests/prctl/.gitignore
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 disable-tsc-ctxt-sw-stress-test
 disable-tsc-on-off-stress-test
 disable-tsc-test
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 66fab4c58ed4..4bca5a9327a4 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /fd-001-lookup
 /fd-002-posix-eq
 /fd-003-kthread
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
index 5a4a26e5464b..9938fb406389 100644
--- a/tools/testing/selftests/pstore/.gitignore
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 logs
 *uuid
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
index f562e49d6917..534ca26eee48 100644
--- a/tools/testing/selftests/ptp/.gitignore
+++ b/tools/testing/selftests/ptp/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 testptp
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index cfcc49a7def7..7bebf9534a86 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 get_syscall_info
 peeksiginfo
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index c0b7f89f0930..2f1f532c39db 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -iquote../../../../include/uapi -Wall
+CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall
 
-TEST_GEN_PROGS := get_syscall_info peeksiginfo
+TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
 
 include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/vmaccess.c b/tools/testing/selftests/ptrace/vmaccess.c
new file mode 100644
index 000000000000..4db327b44586
--- /dev/null
+++ b/tools/testing/selftests/ptrace/vmaccess.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2020 Bernd Edlinger <bernd.edlinger@hotmail.de>
+ * All rights reserved.
+ *
+ * Check whether /proc/$pid/mem can be accessed without causing deadlocks
+ * when de_thread is blocked with ->cred_guard_mutex held.
+ */
+
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+
+static void *thread(void *arg)
+{
+	ptrace(PTRACE_TRACEME, 0, 0L, 0L);
+	return NULL;
+}
+
+TEST(vmaccess)
+{
+	int f, pid = fork();
+	char mm[64];
+
+	if (!pid) {
+		pthread_t pt;
+
+		pthread_create(&pt, NULL, thread, NULL);
+		pthread_join(pt, NULL);
+		execlp("true", "true", NULL);
+	}
+
+	sleep(1);
+	sprintf(mm, "/proc/%d/mem", pid);
+	f = open(mm, O_RDONLY);
+	ASSERT_GE(f, 0);
+	close(f);
+	f = kill(pid, SIGCONT);
+	ASSERT_EQ(f, 0);
+}
+
+TEST(attach)
+{
+	int s, k, pid = fork();
+
+	if (!pid) {
+		pthread_t pt;
+
+		pthread_create(&pt, NULL, thread, NULL);
+		pthread_join(pt, NULL);
+		execlp("sleep", "sleep", "2", NULL);
+	}
+
+	sleep(1);
+	k = ptrace(PTRACE_ATTACH, pid, 0L, 0L);
+	ASSERT_EQ(errno, EAGAIN);
+	ASSERT_EQ(k, -1);
+	k = waitpid(-1, &s, WNOHANG);
+	ASSERT_NE(k, -1);
+	ASSERT_NE(k, 0);
+	ASSERT_NE(k, pid);
+	ASSERT_EQ(WIFEXITED(s), 1);
+	ASSERT_EQ(WEXITSTATUS(s), 0);
+	sleep(1);
+	k = ptrace(PTRACE_ATTACH, pid, 0L, 0L);
+	ASSERT_EQ(k, 0);
+	k = waitpid(-1, &s, 0);
+	ASSERT_EQ(k, pid);
+	ASSERT_EQ(WIFSTOPPED(s), 1);
+	ASSERT_EQ(WSTOPSIG(s), SIGSTOP);
+	k = ptrace(PTRACE_DETACH, pid, 0L, 0L);
+	ASSERT_EQ(k, 0);
+	k = waitpid(-1, &s, 0);
+	ASSERT_EQ(k, pid);
+	ASSERT_EQ(WIFEXITED(s), 1);
+	ASSERT_EQ(WEXITSTATUS(s), 0);
+	k = waitpid(-1, NULL, 0);
+	ASSERT_EQ(k, -1);
+	ASSERT_EQ(errno, ECHILD);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
index ccc240275d1c..f6cbce77460b 100644
--- a/tools/testing/selftests/rcutorture/.gitignore
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 initrd
 b[0-9]*
 res
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index c3a49fb4d6f6..12810229fddc 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -12,7 +12,7 @@
 # Returns 1 if the specified boot-parameter string tells rcutorture to
 # test CPU-hotplug operations.
 bootparam_hotplug_cpu () {
-	echo "$1" | grep -q "rcutorture\.onoff_"
+	echo "$1" | grep -q "torture\.onoff_"
 }
 
 # checkarg --argname argtype $# arg mustmatch cannotmatch
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 1871d00bccd7..6f50722f251f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -20,7 +20,9 @@
 rundir="${1}"
 if test -z "$rundir" -o ! -d "$rundir"
 then
+	echo Directory "$rundir" not found.
 	echo Usage: $0 directory
+	exit 1
 fi
 editor=${EDITOR-vi}
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index e5edd5198725..0326f4a5ff9c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -13,6 +13,9 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
+T=/tmp/kvm-recheck.sh.$$
+trap 'rm -f $T' 0 2
+
 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
 . functions.sh
 for rd in "$@"
@@ -68,4 +71,16 @@ do
 		fi
 	done
 done
-EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1
+EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
+ret=$?
+builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`"
+if test "$builderrors" -gt 0
+then
+	echo $builderrors runs with build errors.
+fi
+runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`"
+if test "$runerrors" -gt 0
+then
+	echo $runerrors runs with runtime errors.
+fi
+exit $ret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 78d18ab8e954..2315e2ec12d6 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -39,7 +39,7 @@ TORTURE_TRUST_MAKE=""
 resdir=""
 configs=""
 cpus=0
-ds=`date +%Y.%m.%d-%H:%M:%S`
+ds=`date +%Y.%m.%d-%H.%M.%S`
 jitter="-1"
 
 usage () {
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index e19a444a0684..0e92d85313aa 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -3,3 +3,5 @@ CONFIG_PRINTK_TIME=y
 CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_KVM_GUEST=y
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n
+CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
new file mode 100644
index 000000000000..2debe7891aeb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=100
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_DEBUG_OBJECTS=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
index 712a3d41a325..24e27957efcc 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
index 1d016e66980a..57d296341304 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
index f47cb2045f13..d65462d64816 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.out
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
new file mode 100644
index 000000000000..d585cc1948cc
--- /dev/null
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -0,0 +1,17 @@
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -g -Wall
+SRCS=$(wildcard *.c)
+OBJS=$(SRCS:.c=.o)
+
+all: resctrl_tests
+
+$(OBJS): $(SRCS)
+	$(CC) $(CFLAGS) -c $(SRCS)
+
+resctrl_tests: $(OBJS)
+	$(CC) $(CFLAGS) -o $@ $^
+
+.PHONY: clean
+
+clean:
+	$(RM) $(OBJS) resctrl_tests
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
new file mode 100644
index 000000000000..6e5a0ffa18e8
--- /dev/null
+++ b/tools/testing/selftests/resctrl/README
@@ -0,0 +1,53 @@
+resctrl_tests - resctrl file system test suit
+
+Authors:
+	Fenghua Yu <fenghua.yu@intel.com>
+	Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+
+resctrl_tests tests various resctrl functionalities and interfaces including
+both software and hardware.
+
+Currently it supports Memory Bandwidth Monitoring test and Memory Bandwidth
+Allocation test on Intel RDT hardware. More tests will be added in the future.
+And the test suit can be extended to cover AMD QoS and ARM MPAM hardware
+as well.
+
+BUILD
+-----
+
+Run "make" to build executable file "resctrl_tests".
+
+RUN
+---
+
+To use resctrl_tests, root or sudoer privileges are required. This is because
+the test needs to mount resctrl file system and change contents in the file
+system.
+
+Executing the test without any parameter will run all supported tests:
+
+	sudo ./resctrl_tests
+
+OVERVIEW OF EXECUTION
+---------------------
+
+A test case has four stages:
+
+  - setup: mount resctrl file system, create group, setup schemata, move test
+    process pids to tasks, start benchmark.
+  - execute: let benchmark run
+  - verify: get resctrl data and verify the data with another source, e.g.
+    perf event.
+  - teardown: umount resctrl and clear temporary files.
+
+ARGUMENTS
+---------
+
+Parameter '-h' shows usage information.
+
+usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
+        -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf
+        -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat
+        -n no_of_bits: run cache tests using specified no of bits in cache bit mask
+        -p cpu_no: specify CPU number to run the test. 1 is default
+        -h: help
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
new file mode 100644
index 000000000000..38dbf4962e33
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdint.h>
+#include "resctrl.h"
+
+struct read_format {
+	__u64 nr;			/* The number of events */
+	struct {
+		__u64 value;		/* The value of the event */
+	} values[2];
+};
+
+static struct perf_event_attr pea_llc_miss;
+static struct read_format rf_cqm;
+static int fd_lm;
+char llc_occup_path[1024];
+
+static void initialize_perf_event_attr(void)
+{
+	pea_llc_miss.type = PERF_TYPE_HARDWARE;
+	pea_llc_miss.size = sizeof(struct perf_event_attr);
+	pea_llc_miss.read_format = PERF_FORMAT_GROUP;
+	pea_llc_miss.exclude_kernel = 1;
+	pea_llc_miss.exclude_hv = 1;
+	pea_llc_miss.exclude_idle = 1;
+	pea_llc_miss.exclude_callchain_kernel = 1;
+	pea_llc_miss.inherit = 1;
+	pea_llc_miss.exclude_guest = 1;
+	pea_llc_miss.disabled = 1;
+}
+
+static void ioctl_perf_event_ioc_reset_enable(void)
+{
+	ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0);
+	ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+static int perf_event_open_llc_miss(pid_t pid, int cpu_no)
+{
+	fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1,
+				PERF_FLAG_FD_CLOEXEC);
+	if (fd_lm == -1) {
+		perror("Error opening leader");
+		ctrlc_handler(0, NULL, NULL);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int initialize_llc_perf(void)
+{
+	memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr));
+	memset(&rf_cqm, 0, sizeof(struct read_format));
+
+	/* Initialize perf_event_attr structures for HW_CACHE_MISSES */
+	initialize_perf_event_attr();
+
+	pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES;
+
+	rf_cqm.nr = 1;
+
+	return 0;
+}
+
+static int reset_enable_llc_perf(pid_t pid, int cpu_no)
+{
+	int ret = 0;
+
+	ret = perf_event_open_llc_miss(pid, cpu_no);
+	if (ret < 0)
+		return ret;
+
+	/* Start counters to log values */
+	ioctl_perf_event_ioc_reset_enable();
+
+	return 0;
+}
+
+/*
+ * get_llc_perf:	llc cache miss through perf events
+ * @cpu_no:		CPU number that the benchmark PID is binded to
+ *
+ * Perf events like HW_CACHE_MISSES could be used to validate number of
+ * cache lines allocated.
+ *
+ * Return: =0 on success.  <0 on failure.
+ */
+static int get_llc_perf(unsigned long *llc_perf_miss)
+{
+	__u64 total_misses;
+
+	/* Stop counters after one span to get miss rate */
+
+	ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
+
+	if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
+		perror("Could not get llc misses through perf");
+
+		return -1;
+	}
+
+	total_misses = rf_cqm.values[0].value;
+
+	close(fd_lm);
+
+	*llc_perf_miss = total_misses;
+
+	return 0;
+}
+
+/*
+ * Get LLC Occupancy as reported by RESCTRL FS
+ * For CQM,
+ * 1. If con_mon grp and mon grp given, then read from mon grp in
+ * con_mon grp
+ * 2. If only con_mon grp given, then read from con_mon grp
+ * 3. If both not given, then read from root con_mon grp
+ * For CAT,
+ * 1. If con_mon grp given, then read from it
+ * 2. If con_mon grp not given, then read from root con_mon grp
+ *
+ * Return: =0 on success.  <0 on failure.
+ */
+static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
+{
+	FILE *fp;
+
+	fp = fopen(llc_occup_path, "r");
+	if (!fp) {
+		perror("Failed to open results file");
+
+		return errno;
+	}
+	if (fscanf(fp, "%lu", llc_occupancy) <= 0) {
+		perror("Could not get llc occupancy");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+/*
+ * print_results_cache:	the cache results are stored in a file
+ * @filename:		file that stores the results
+ * @bm_pid:		child pid that runs benchmark
+ * @llc_value:		perf miss value /
+ *			llc occupancy value reported by resctrl FS
+ *
+ * Return:		0 on success. non-zero on failure.
+ */
+static int print_results_cache(char *filename, int bm_pid,
+			       unsigned long llc_value)
+{
+	FILE *fp;
+
+	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
+		printf("Pid: %d \t LLC_value: %lu\n", bm_pid,
+		       llc_value);
+	} else {
+		fp = fopen(filename, "a");
+		if (!fp) {
+			perror("Cannot open results file");
+
+			return errno;
+		}
+		fprintf(fp, "Pid: %d \t llc_value: %lu\n", bm_pid, llc_value);
+		fclose(fp);
+	}
+
+	return 0;
+}
+
+int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
+{
+	unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
+	int ret;
+
+	/*
+	 * Measure cache miss from perf.
+	 */
+	if (!strcmp(param->resctrl_val, "cat")) {
+		ret = get_llc_perf(&llc_perf_miss);
+		if (ret < 0)
+			return ret;
+		llc_value = llc_perf_miss;
+	}
+
+	/*
+	 * Measure llc occupancy from resctrl.
+	 */
+	if (!strcmp(param->resctrl_val, "cqm")) {
+		ret = get_llc_occu_resctrl(&llc_occu_resc);
+		if (ret < 0)
+			return ret;
+		llc_value = llc_occu_resc;
+	}
+	ret = print_results_cache(param->filename, bm_pid, llc_value);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * cache_val:		execute benchmark and measure LLC occupancy resctrl
+ * and perf cache miss for the benchmark
+ * @param:		parameters passed to cache_val()
+ *
+ * Return:		0 on success. non-zero on failure.
+ */
+int cat_val(struct resctrl_val_param *param)
+{
+	int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0;
+	char *resctrl_val = param->resctrl_val;
+	pid_t bm_pid;
+
+	if (strcmp(param->filename, "") == 0)
+		sprintf(param->filename, "stdio");
+
+	bm_pid = getpid();
+
+	/* Taskset benchmark to specified cpu */
+	ret = taskset_benchmark(bm_pid, param->cpu_no);
+	if (ret)
+		return ret;
+
+	/* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
+	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+				      resctrl_val);
+	if (ret)
+		return ret;
+
+	if ((strcmp(resctrl_val, "cat") == 0)) {
+		ret = initialize_llc_perf();
+		if (ret)
+			return ret;
+	}
+
+	/* Test runs until the callback setup() tells the test to stop. */
+	while (1) {
+		if (strcmp(resctrl_val, "cat") == 0) {
+			ret = param->setup(1, param);
+			if (ret) {
+				ret = 0;
+				break;
+			}
+			ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
+			if (ret)
+				break;
+
+			if (run_fill_buf(param->span, malloc_and_init_memory,
+					 memflush, operation, resctrl_val)) {
+				fprintf(stderr, "Error-running fill buffer\n");
+				ret = -1;
+				break;
+			}
+
+			sleep(1);
+			ret = measure_cache_vals(param, bm_pid);
+			if (ret)
+				break;
+		} else {
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
new file mode 100644
index 000000000000..5da43767b973
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache Allocation Technology (CAT) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+#include <unistd.h>
+
+#define RESULT_FILE_NAME1	"result_cat1"
+#define RESULT_FILE_NAME2	"result_cat2"
+#define NUM_OF_RUNS		5
+#define MAX_DIFF_PERCENT	4
+#define MAX_DIFF		1000000
+
+int count_of_bits;
+char cbm_mask[256];
+unsigned long long_mask;
+unsigned long cache_size;
+
+/*
+ * Change schemata. Write schemata to specified
+ * con_mon grp, mon_grp in resctrl FS.
+ * Run 5 times in order to get average values.
+ */
+static int cat_setup(int num, ...)
+{
+	struct resctrl_val_param *p;
+	char schemata[64];
+	va_list param;
+	int ret = 0;
+
+	va_start(param, num);
+	p = va_arg(param, struct resctrl_val_param *);
+	va_end(param);
+
+	/* Run NUM_OF_RUNS times */
+	if (p->num_of_runs >= NUM_OF_RUNS)
+		return -1;
+
+	if (p->num_of_runs == 0) {
+		sprintf(schemata, "%lx", p->mask);
+		ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
+				     p->resctrl_val);
+	}
+	p->num_of_runs++;
+
+	return ret;
+}
+
+static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits,
+			    unsigned long span)
+{
+	unsigned long allocated_cache_lines = span / 64;
+	unsigned long avg_llc_perf_miss = 0;
+	float diff_percent;
+
+	avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1);
+	diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) /
+				allocated_cache_lines * 100;
+
+	printf("%sok CAT: cache miss rate within %d%%\n",
+	       !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ?
+	       "not " : "", MAX_DIFF_PERCENT);
+	tests_run++;
+	printf("# Percent diff=%d\n", abs((int)diff_percent));
+	printf("# Number of bits: %d\n", no_of_bits);
+	printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss);
+	printf("# Allocated cache lines: %lu\n", allocated_cache_lines);
+}
+
+static int check_results(struct resctrl_val_param *param)
+{
+	char *token_array[8], temp[512];
+	unsigned long sum_llc_perf_miss = 0;
+	int runs = 0, no_of_bits = 0;
+	FILE *fp;
+
+	printf("# Checking for pass/fail\n");
+	fp = fopen(param->filename, "r");
+	if (!fp) {
+		perror("# Cannot open file");
+
+		return errno;
+	}
+
+	while (fgets(temp, sizeof(temp), fp)) {
+		char *token = strtok(temp, ":\t");
+		int fields = 0;
+
+		while (token) {
+			token_array[fields++] = token;
+			token = strtok(NULL, ":\t");
+		}
+		/*
+		 * Discard the first value which is inaccurate due to monitoring
+		 * setup transition phase.
+		 */
+		if (runs > 0)
+			sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
+		runs++;
+	}
+
+	fclose(fp);
+	no_of_bits = count_bits(param->mask);
+
+	show_cache_info(sum_llc_perf_miss, no_of_bits, param->span);
+
+	return 0;
+}
+
+void cat_test_cleanup(void)
+{
+	remove(RESULT_FILE_NAME1);
+	remove(RESULT_FILE_NAME2);
+}
+
+int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
+{
+	unsigned long l_mask, l_mask_1;
+	int ret, pipefd[2], sibling_cpu_no;
+	char pipe_message;
+	pid_t bm_pid;
+
+	cache_size = 0;
+
+	ret = remount_resctrlfs(true);
+	if (ret)
+		return ret;
+
+	if (!validate_resctrl_feature_request("cat"))
+		return -1;
+
+	/* Get default cbm mask for L3/L2 cache */
+	ret = get_cbm_mask(cache_type);
+	if (ret)
+		return ret;
+
+	long_mask = strtoul(cbm_mask, NULL, 16);
+
+	/* Get L3/L2 cache size */
+	ret = get_cache_size(cpu_no, cache_type, &cache_size);
+	if (ret)
+		return ret;
+	printf("cache size :%lu\n", cache_size);
+
+	/* Get max number of bits from default-cabm mask */
+	count_of_bits = count_bits(long_mask);
+
+	if (n < 1 || n > count_of_bits - 1) {
+		printf("Invalid input value for no_of_bits n!\n");
+		printf("Please Enter value in range 1 to %d\n",
+		       count_of_bits - 1);
+		return -1;
+	}
+
+	/* Get core id from same socket for running another thread */
+	sibling_cpu_no = get_core_sibling(cpu_no);
+	if (sibling_cpu_no < 0)
+		return -1;
+
+	struct resctrl_val_param param = {
+		.resctrl_val	= "cat",
+		.cpu_no		= cpu_no,
+		.mum_resctrlfs	= 0,
+		.setup		= cat_setup,
+	};
+
+	l_mask = long_mask >> n;
+	l_mask_1 = ~l_mask & long_mask;
+
+	/* Set param values for parent thread which will be allocated bitmask
+	 * with (max_bits - n) bits
+	 */
+	param.span = cache_size * (count_of_bits - n) / count_of_bits;
+	strcpy(param.ctrlgrp, "c2");
+	strcpy(param.mongrp, "m2");
+	strcpy(param.filename, RESULT_FILE_NAME2);
+	param.mask = l_mask;
+	param.num_of_runs = 0;
+
+	if (pipe(pipefd)) {
+		perror("# Unable to create pipe");
+		return errno;
+	}
+
+	bm_pid = fork();
+
+	/* Set param values for child thread which will be allocated bitmask
+	 * with n bits
+	 */
+	if (bm_pid == 0) {
+		param.mask = l_mask_1;
+		strcpy(param.ctrlgrp, "c1");
+		strcpy(param.mongrp, "m1");
+		param.span = cache_size * n / count_of_bits;
+		strcpy(param.filename, RESULT_FILE_NAME1);
+		param.num_of_runs = 0;
+		param.cpu_no = sibling_cpu_no;
+	}
+
+	remove(param.filename);
+
+	ret = cat_val(&param);
+	if (ret)
+		return ret;
+
+	ret = check_results(&param);
+	if (ret)
+		return ret;
+
+	if (bm_pid == 0) {
+		/* Tell parent that child is ready */
+		close(pipefd[0]);
+		pipe_message = 1;
+		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
+		    sizeof(pipe_message)) {
+			close(pipefd[1]);
+			perror("# failed signaling parent process");
+			return errno;
+		}
+
+		close(pipefd[1]);
+		while (1)
+			;
+	} else {
+		/* Parent waits for child to be ready. */
+		close(pipefd[1]);
+		pipe_message = 0;
+		while (pipe_message != 1) {
+			if (read(pipefd[0], &pipe_message,
+				 sizeof(pipe_message)) < sizeof(pipe_message)) {
+				perror("# failed reading from child process");
+				break;
+			}
+		}
+		close(pipefd[0]);
+		kill(bm_pid, SIGKILL);
+	}
+
+	cat_test_cleanup();
+	if (bm_pid)
+		umount_resctrlfs();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cqm_test.c
new file mode 100644
index 000000000000..c8756152bd61
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cqm_test.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache Monitoring Technology (CQM) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+#include <unistd.h>
+
+#define RESULT_FILE_NAME	"result_cqm"
+#define NUM_OF_RUNS		5
+#define MAX_DIFF		2000000
+#define MAX_DIFF_PERCENT	15
+
+int count_of_bits;
+char cbm_mask[256];
+unsigned long long_mask;
+unsigned long cache_size;
+
+static int cqm_setup(int num, ...)
+{
+	struct resctrl_val_param *p;
+	va_list param;
+
+	va_start(param, num);
+	p = va_arg(param, struct resctrl_val_param *);
+	va_end(param);
+
+	/* Run NUM_OF_RUNS times */
+	if (p->num_of_runs >= NUM_OF_RUNS)
+		return -1;
+
+	p->num_of_runs++;
+
+	return 0;
+}
+
+static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits,
+			    unsigned long span)
+{
+	unsigned long avg_llc_occu_resc = 0;
+	float diff_percent;
+	long avg_diff = 0;
+	bool res;
+
+	avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1);
+	avg_diff = (long)abs(span - avg_llc_occu_resc);
+
+	diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100;
+
+	if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) ||
+	    (abs(avg_diff) <= MAX_DIFF))
+		res = true;
+	else
+		res = false;
+
+	printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not",
+	       MAX_DIFF, (int)MAX_DIFF_PERCENT);
+
+	printf("# diff: %ld\n", avg_diff);
+	printf("# percent diff=%d\n", abs((int)diff_percent));
+	printf("# Results are displayed in (Bytes)\n");
+	printf("# Number of bits: %d\n", no_of_bits);
+	printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc);
+	printf("# llc_occu_exp (span): %lu\n", span);
+
+	tests_run++;
+}
+
+static int check_results(struct resctrl_val_param *param, int no_of_bits)
+{
+	char *token_array[8], temp[512];
+	unsigned long sum_llc_occu_resc = 0;
+	int runs = 0;
+	FILE *fp;
+
+	printf("# checking for pass/fail\n");
+	fp = fopen(param->filename, "r");
+	if (!fp) {
+		perror("# Error in opening file\n");
+
+		return errno;
+	}
+
+	while (fgets(temp, 1024, fp)) {
+		char *token = strtok(temp, ":\t");
+		int fields = 0;
+
+		while (token) {
+			token_array[fields++] = token;
+			token = strtok(NULL, ":\t");
+		}
+
+		/* Field 3 is llc occ resc value */
+		if (runs > 0)
+			sum_llc_occu_resc += strtoul(token_array[3], NULL, 0);
+		runs++;
+	}
+	fclose(fp);
+	show_cache_info(sum_llc_occu_resc, no_of_bits, param->span);
+
+	return 0;
+}
+
+void cqm_test_cleanup(void)
+{
+	remove(RESULT_FILE_NAME);
+}
+
+int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+{
+	int ret, mum_resctrlfs;
+
+	cache_size = 0;
+	mum_resctrlfs = 1;
+
+	ret = remount_resctrlfs(mum_resctrlfs);
+	if (ret)
+		return ret;
+
+	if (!validate_resctrl_feature_request("cqm"))
+		return -1;
+
+	ret = get_cbm_mask("L3");
+	if (ret)
+		return ret;
+
+	long_mask = strtoul(cbm_mask, NULL, 16);
+
+	ret = get_cache_size(cpu_no, "L3", &cache_size);
+	if (ret)
+		return ret;
+	printf("cache size :%lu\n", cache_size);
+
+	count_of_bits = count_bits(long_mask);
+
+	if (n < 1 || n > count_of_bits) {
+		printf("Invalid input value for numbr_of_bits n!\n");
+		printf("Please Enter value in range 1 to %d\n", count_of_bits);
+		return -1;
+	}
+
+	struct resctrl_val_param param = {
+		.resctrl_val	= "cqm",
+		.ctrlgrp	= "c1",
+		.mongrp		= "m1",
+		.cpu_no		= cpu_no,
+		.mum_resctrlfs	= 0,
+		.filename	= RESULT_FILE_NAME,
+		.mask		= ~(long_mask << n) & long_mask,
+		.span		= cache_size * n / count_of_bits,
+		.num_of_runs	= 0,
+		.setup		= cqm_setup,
+	};
+
+	if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
+		sprintf(benchmark_cmd[1], "%lu", param.span);
+
+	remove(RESULT_FILE_NAME);
+
+	ret = resctrl_val(benchmark_cmd, &param);
+	if (ret)
+		return ret;
+
+	ret = check_results(&param, n);
+	if (ret)
+		return ret;
+
+	cqm_test_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
new file mode 100644
index 000000000000..79c611c99a3d
--- /dev/null
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fill_buf benchmark
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <inttypes.h>
+#include <malloc.h>
+#include <string.h>
+
+#include "resctrl.h"
+
+#define CL_SIZE			(64)
+#define PAGE_SIZE		(4 * 1024)
+#define MB			(1024 * 1024)
+
+static unsigned char *startptr;
+
+static void sb(void)
+{
+#if defined(__i386) || defined(__x86_64)
+	asm volatile("sfence\n\t"
+		     : : : "memory");
+#endif
+}
+
+static void ctrl_handler(int signo)
+{
+	free(startptr);
+	printf("\nEnding\n");
+	sb();
+	exit(EXIT_SUCCESS);
+}
+
+static void cl_flush(void *p)
+{
+#if defined(__i386) || defined(__x86_64)
+	asm volatile("clflush (%0)\n\t"
+		     : : "r"(p) : "memory");
+#endif
+}
+
+static void mem_flush(void *p, size_t s)
+{
+	char *cp = (char *)p;
+	size_t i = 0;
+
+	s = s / CL_SIZE; /* mem size in cache llines */
+
+	for (i = 0; i < s; i++)
+		cl_flush(&cp[i * CL_SIZE]);
+
+	sb();
+}
+
+static void *malloc_and_init_memory(size_t s)
+{
+	uint64_t *p64;
+	size_t s64;
+
+	void *p = memalign(PAGE_SIZE, s);
+
+	p64 = (uint64_t *)p;
+	s64 = s / sizeof(uint64_t);
+
+	while (s64 > 0) {
+		*p64 = (uint64_t)rand();
+		p64 += (CL_SIZE / sizeof(uint64_t));
+		s64 -= (CL_SIZE / sizeof(uint64_t));
+	}
+
+	return p;
+}
+
+static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
+{
+	unsigned char sum, *p;
+
+	sum = 0;
+	p = start_ptr;
+	while (p < end_ptr) {
+		sum += *p;
+		p += (CL_SIZE / 2);
+	}
+
+	return sum;
+}
+
+static
+void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr)
+{
+	unsigned char *p;
+
+	p = start_ptr;
+	while (p < end_ptr) {
+		*p = '1';
+		p += (CL_SIZE / 2);
+	}
+}
+
+static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
+			   char *resctrl_val)
+{
+	int ret = 0;
+	FILE *fp;
+
+	while (1) {
+		ret = fill_one_span_read(start_ptr, end_ptr);
+		if (!strcmp(resctrl_val, "cat"))
+			break;
+	}
+
+	/* Consume read result so that reading memory is not optimized out. */
+	fp = fopen("/dev/null", "w");
+	if (!fp)
+		perror("Unable to write to /dev/null");
+	fprintf(fp, "Sum: %d ", ret);
+	fclose(fp);
+
+	return 0;
+}
+
+static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
+			    char *resctrl_val)
+{
+	while (1) {
+		fill_one_span_write(start_ptr, end_ptr);
+		if (!strcmp(resctrl_val, "cat"))
+			break;
+	}
+
+	return 0;
+}
+
+static int
+fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
+	   int op, char *resctrl_val)
+{
+	unsigned char *start_ptr, *end_ptr;
+	unsigned long long i;
+	int ret;
+
+	if (malloc_and_init)
+		start_ptr = malloc_and_init_memory(buf_size);
+	else
+		start_ptr = malloc(buf_size);
+
+	if (!start_ptr)
+		return -1;
+
+	startptr = start_ptr;
+	end_ptr = start_ptr + buf_size;
+
+	/*
+	 * It's better to touch the memory once to avoid any compiler
+	 * optimizations
+	 */
+	if (!malloc_and_init) {
+		for (i = 0; i < buf_size; i++)
+			*start_ptr++ = (unsigned char)rand();
+	}
+
+	start_ptr = startptr;
+
+	/* Flush the memory before using to avoid "cache hot pages" effect */
+	if (memflush)
+		mem_flush(start_ptr, buf_size);
+
+	if (op == 0)
+		ret = fill_cache_read(start_ptr, end_ptr, resctrl_val);
+	else
+		ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
+
+	if (ret) {
+		printf("\n Error in fill cache read/write...\n");
+		return -1;
+	}
+
+	free(startptr);
+
+	return 0;
+}
+
+int run_fill_buf(unsigned long span, int malloc_and_init_memory,
+		 int memflush, int op, char *resctrl_val)
+{
+	unsigned long long cache_size = span;
+	int ret;
+
+	/* set up ctrl-c handler */
+	if (signal(SIGINT, ctrl_handler) == SIG_ERR)
+		printf("Failed to catch SIGINT!\n");
+	if (signal(SIGHUP, ctrl_handler) == SIG_ERR)
+		printf("Failed to catch SIGHUP!\n");
+
+	ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
+			 resctrl_val);
+	if (ret) {
+		printf("\n Error in fill cache\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
new file mode 100644
index 000000000000..7bf8eaa6204b
--- /dev/null
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory Bandwidth Allocation (MBA) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define RESULT_FILE_NAME	"result_mba"
+#define NUM_OF_RUNS		5
+#define MAX_DIFF		300
+#define ALLOCATION_MAX		100
+#define ALLOCATION_MIN		10
+#define ALLOCATION_STEP		10
+
+/*
+ * Change schemata percentage from 100 to 10%. Write schemata to specified
+ * con_mon grp, mon_grp in resctrl FS.
+ * For each allocation, run 5 times in order to get average values.
+ */
+static int mba_setup(int num, ...)
+{
+	static int runs_per_allocation, allocation = 100;
+	struct resctrl_val_param *p;
+	char allocation_str[64];
+	va_list param;
+
+	va_start(param, num);
+	p = va_arg(param, struct resctrl_val_param *);
+	va_end(param);
+
+	if (runs_per_allocation >= NUM_OF_RUNS)
+		runs_per_allocation = 0;
+
+	/* Only set up schemata once every NUM_OF_RUNS of allocations */
+	if (runs_per_allocation++ != 0)
+		return 0;
+
+	if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX)
+		return -1;
+
+	sprintf(allocation_str, "%d", allocation);
+
+	write_schemata(p->ctrlgrp, allocation_str, p->cpu_no, p->resctrl_val);
+	allocation -= ALLOCATION_STEP;
+
+	return 0;
+}
+
+static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
+{
+	int allocation, runs;
+	bool failed = false;
+
+	printf("# Results are displayed in (MB)\n");
+	/* Memory bandwidth from 100% down to 10% */
+	for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
+	     allocation++) {
+		unsigned long avg_bw_imc, avg_bw_resc;
+		unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+		unsigned long avg_diff;
+
+		/*
+		 * The first run is discarded due to inaccurate value from
+		 * phase transition.
+		 */
+		for (runs = NUM_OF_RUNS * allocation + 1;
+		     runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) {
+			sum_bw_imc += bw_imc[runs];
+			sum_bw_resc += bw_resc[runs];
+		}
+
+		avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
+		avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
+		avg_diff = labs((long)(avg_bw_resc - avg_bw_imc));
+
+		printf("%sok MBA schemata percentage %u smaller than %d %%\n",
+		       avg_diff > MAX_DIFF ? "not " : "",
+		       ALLOCATION_MAX - ALLOCATION_STEP * allocation,
+		       MAX_DIFF);
+		tests_run++;
+		printf("# avg_diff: %lu\n", avg_diff);
+		printf("# avg_bw_imc: %lu\n", avg_bw_imc);
+		printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+		if (avg_diff > MAX_DIFF)
+			failed = true;
+	}
+
+	printf("%sok schemata change using MBA%s\n", failed ? "not " : "",
+	       failed ? " # at least one test failed" : "");
+	tests_run++;
+}
+
+static int check_results(void)
+{
+	char *token_array[8], output[] = RESULT_FILE_NAME, temp[512];
+	unsigned long bw_imc[1024], bw_resc[1024];
+	int runs;
+	FILE *fp;
+
+	fp = fopen(output, "r");
+	if (!fp) {
+		perror(output);
+
+		return errno;
+	}
+
+	runs = 0;
+	while (fgets(temp, sizeof(temp), fp)) {
+		char *token = strtok(temp, ":\t");
+		int fields = 0;
+
+		while (token) {
+			token_array[fields++] = token;
+			token = strtok(NULL, ":\t");
+		}
+
+		/* Field 3 is perf imc value */
+		bw_imc[runs] = strtoul(token_array[3], NULL, 0);
+		/* Field 5 is resctrl value */
+		bw_resc[runs] = strtoul(token_array[5], NULL, 0);
+		runs++;
+	}
+
+	fclose(fp);
+
+	show_mba_info(bw_imc, bw_resc);
+
+	return 0;
+}
+
+void mba_test_cleanup(void)
+{
+	remove(RESULT_FILE_NAME);
+}
+
+int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
+{
+	struct resctrl_val_param param = {
+		.resctrl_val	= "mba",
+		.ctrlgrp	= "c1",
+		.mongrp		= "m1",
+		.cpu_no		= cpu_no,
+		.mum_resctrlfs	= 1,
+		.filename	= RESULT_FILE_NAME,
+		.bw_report	= bw_report,
+		.setup		= mba_setup
+	};
+	int ret;
+
+	remove(RESULT_FILE_NAME);
+
+	if (!validate_resctrl_feature_request("mba"))
+		return -1;
+
+	ret = resctrl_val(benchmark_cmd, &param);
+	if (ret)
+		return ret;
+
+	ret = check_results();
+	if (ret)
+		return ret;
+
+	mba_test_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
new file mode 100644
index 000000000000..4700f7453f81
--- /dev/null
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory Bandwidth Monitoring (MBM) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define RESULT_FILE_NAME	"result_mbm"
+#define MAX_DIFF		300
+#define NUM_OF_RUNS		5
+
+static void
+show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
+{
+	unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
+	unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+	long avg_diff = 0;
+	int runs;
+
+	/*
+	 * Discard the first value which is inaccurate due to monitoring setup
+	 * transition phase.
+	 */
+	for (runs = 1; runs < NUM_OF_RUNS ; runs++) {
+		sum_bw_imc += bw_imc[runs];
+		sum_bw_resc += bw_resc[runs];
+	}
+
+	avg_bw_imc = sum_bw_imc / 4;
+	avg_bw_resc = sum_bw_resc / 4;
+	avg_diff = avg_bw_resc - avg_bw_imc;
+
+	printf("%sok MBM: diff within %d%%\n",
+	       labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF);
+	tests_run++;
+	printf("# avg_diff: %lu\n", labs(avg_diff));
+	printf("# Span (MB): %d\n", span);
+	printf("# avg_bw_imc: %lu\n", avg_bw_imc);
+	printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+}
+
+static int check_results(int span)
+{
+	unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
+	char temp[1024], *token_array[8];
+	char output[] = RESULT_FILE_NAME;
+	int runs;
+	FILE *fp;
+
+	printf("# Checking for pass/fail\n");
+
+	fp = fopen(output, "r");
+	if (!fp) {
+		perror(output);
+
+		return errno;
+	}
+
+	runs = 0;
+	while (fgets(temp, sizeof(temp), fp)) {
+		char *token = strtok(temp, ":\t");
+		int i = 0;
+
+		while (token) {
+			token_array[i++] = token;
+			token = strtok(NULL, ":\t");
+		}
+
+		bw_resc[runs] = strtoul(token_array[5], NULL, 0);
+		bw_imc[runs] = strtoul(token_array[3], NULL, 0);
+		runs++;
+	}
+
+	show_bw_info(bw_imc, bw_resc, span);
+
+	fclose(fp);
+
+	return 0;
+}
+
+static int mbm_setup(int num, ...)
+{
+	struct resctrl_val_param *p;
+	static int num_of_runs;
+	va_list param;
+	int ret = 0;
+
+	/* Run NUM_OF_RUNS times */
+	if (num_of_runs++ >= NUM_OF_RUNS)
+		return -1;
+
+	va_start(param, num);
+	p = va_arg(param, struct resctrl_val_param *);
+	va_end(param);
+
+	/* Set up shemata with 100% allocation on the first run. */
+	if (num_of_runs == 0)
+		ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
+				     p->resctrl_val);
+
+	return ret;
+}
+
+void mbm_test_cleanup(void)
+{
+	remove(RESULT_FILE_NAME);
+}
+
+int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
+{
+	struct resctrl_val_param param = {
+		.resctrl_val	= "mbm",
+		.ctrlgrp	= "c1",
+		.mongrp		= "m1",
+		.span		= span,
+		.cpu_no		= cpu_no,
+		.mum_resctrlfs	= 1,
+		.filename	= RESULT_FILE_NAME,
+		.bw_report	=  bw_report,
+		.setup		= mbm_setup
+	};
+	int ret;
+
+	remove(RESULT_FILE_NAME);
+
+	if (!validate_resctrl_feature_request("mbm"))
+		return -1;
+
+	ret = resctrl_val(benchmark_cmd, &param);
+	if (ret)
+		return ret;
+
+	ret = check_results(span);
+	if (ret)
+		return ret;
+
+	mbm_test_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
new file mode 100644
index 000000000000..39bf59c6b9c5
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+#ifndef RESCTRL_H
+#define RESCTRL_H
+#include <stdio.h>
+#include <stdarg.h>
+#include <math.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/eventfd.h>
+#include <asm/unistd.h>
+#include <linux/perf_event.h>
+
+#define MB			(1024 * 1024)
+#define RESCTRL_PATH		"/sys/fs/resctrl"
+#define PHYS_ID_PATH		"/sys/devices/system/cpu/cpu"
+#define CBM_MASK_PATH		"/sys/fs/resctrl/info"
+
+#define PARENT_EXIT(err_msg)			\
+	do {					\
+		perror(err_msg);		\
+		kill(ppid, SIGKILL);		\
+		exit(EXIT_FAILURE);		\
+	} while (0)
+
+/*
+ * resctrl_val_param:	resctrl test parameters
+ * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
+ * @ctrlgrp:		Name of the control monitor group (con_mon grp)
+ * @mongrp:		Name of the monitor group (mon grp)
+ * @cpu_no:		CPU number to which the benchmark would be binded
+ * @span:		Memory bytes accessed in each benchmark iteration
+ * @mum_resctrlfs:	Should the resctrl FS be remounted?
+ * @filename:		Name of file to which the o/p should be written
+ * @bw_report:		Bandwidth report type (reads vs writes)
+ * @setup:		Call back function to setup test environment
+ */
+struct resctrl_val_param {
+	char		*resctrl_val;
+	char		ctrlgrp[64];
+	char		mongrp[64];
+	int		cpu_no;
+	unsigned long	span;
+	int		mum_resctrlfs;
+	char		filename[64];
+	char		*bw_report;
+	unsigned long	mask;
+	int		num_of_runs;
+	int		(*setup)(int num, ...);
+};
+
+pid_t bm_pid, ppid;
+int tests_run;
+
+char llc_occup_path[1024];
+bool is_amd;
+
+bool check_resctrlfs_support(void);
+int filter_dmesg(void);
+int remount_resctrlfs(bool mum_resctrlfs);
+int get_resource_id(int cpu_no, int *resource_id);
+int umount_resctrlfs(void);
+int validate_bw_report_request(char *bw_report);
+bool validate_resctrl_feature_request(char *resctrl_val);
+char *fgrep(FILE *inf, const char *str);
+int taskset_benchmark(pid_t bm_pid, int cpu_no);
+void run_benchmark(int signum, siginfo_t *info, void *ucontext);
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
+		   char *resctrl_val);
+int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
+			    char *resctrl_val);
+int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
+		    int group_fd, unsigned long flags);
+int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush,
+		 int op, char *resctrl_va);
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
+int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd);
+void tests_cleanup(void);
+void mbm_test_cleanup(void);
+int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
+void mba_test_cleanup(void);
+int get_cbm_mask(char *cache_type);
+int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
+void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
+int cat_val(struct resctrl_val_param *param);
+void cat_test_cleanup(void);
+int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
+int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+unsigned int count_bits(unsigned long n);
+void cqm_test_cleanup(void);
+int get_core_sibling(int cpu_no);
+int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+
+#endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
new file mode 100644
index 000000000000..425cc85ac883
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Resctrl tests
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define BENCHMARK_ARGS		64
+#define BENCHMARK_ARG_SIZE	64
+
+bool is_amd;
+
+void detect_amd(void)
+{
+	FILE *inf = fopen("/proc/cpuinfo", "r");
+	char *res;
+
+	if (!inf)
+		return;
+
+	res = fgrep(inf, "vendor_id");
+
+	if (res) {
+		char *s = strchr(res, ':');
+
+		is_amd = s && !strcmp(s, ": AuthenticAMD\n");
+		free(res);
+	}
+	fclose(inf);
+}
+
+static void cmd_help(void)
+{
+	printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
+	printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM");
+	printf("\t default benchmark is builtin fill_buf\n");
+	printf("\t-t test list: run tests specified in the test list, ");
+	printf("e.g. -t mbm, mba, cqm, cat\n");
+	printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
+	printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
+	printf("\t-h: help\n");
+}
+
+void tests_cleanup(void)
+{
+	mbm_test_cleanup();
+	mba_test_cleanup();
+	cqm_test_cleanup();
+	cat_test_cleanup();
+}
+
+int main(int argc, char **argv)
+{
+	bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true;
+	int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5;
+	char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
+	char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
+	int ben_ind, ben_count;
+	bool cat_test = true;
+
+	for (i = 0; i < argc; i++) {
+		if (strcmp(argv[i], "-b") == 0) {
+			ben_ind = i + 1;
+			ben_count = argc - ben_ind;
+			argc_new = ben_ind - 1;
+			has_ben = true;
+			break;
+		}
+	}
+
+	while ((c = getopt(argc_new, argv, "ht:b:")) != -1) {
+		char *token;
+
+		switch (c) {
+		case 't':
+			token = strtok(optarg, ",");
+
+			mbm_test = false;
+			mba_test = false;
+			cqm_test = false;
+			cat_test = false;
+			while (token) {
+				if (!strcmp(token, "mbm")) {
+					mbm_test = true;
+				} else if (!strcmp(token, "mba")) {
+					mba_test = true;
+				} else if (!strcmp(token, "cqm")) {
+					cqm_test = true;
+				} else if (!strcmp(token, "cat")) {
+					cat_test = true;
+				} else {
+					printf("invalid argument\n");
+
+					return -1;
+				}
+				token = strtok(NULL, ":\t");
+			}
+			break;
+		case 'p':
+			cpu_no = atoi(optarg);
+			break;
+		case 'n':
+			no_of_bits = atoi(optarg);
+			break;
+		case 'h':
+			cmd_help();
+
+			return 0;
+		default:
+			printf("invalid argument\n");
+
+			return -1;
+		}
+	}
+
+	printf("TAP version 13\n");
+
+	/*
+	 * Typically we need root privileges, because:
+	 * 1. We write to resctrl FS
+	 * 2. We execute perf commands
+	 */
+	if (geteuid() != 0)
+		printf("# WARNING: not running as root, tests may fail.\n");
+
+	/* Detect AMD vendor */
+	detect_amd();
+
+	if (has_ben) {
+		/* Extract benchmark command from command line. */
+		for (i = ben_ind; i < argc; i++) {
+			benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i];
+			sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]);
+		}
+		benchmark_cmd[ben_count] = NULL;
+	} else {
+		/* If no benchmark is given by "-b" argument, use fill_buf. */
+		for (i = 0; i < 6; i++)
+			benchmark_cmd[i] = benchmark_cmd_area[i];
+
+		strcpy(benchmark_cmd[0], "fill_buf");
+		sprintf(benchmark_cmd[1], "%d", span);
+		strcpy(benchmark_cmd[2], "1");
+		strcpy(benchmark_cmd[3], "1");
+		strcpy(benchmark_cmd[4], "0");
+		strcpy(benchmark_cmd[5], "");
+		benchmark_cmd[6] = NULL;
+	}
+
+	sprintf(bw_report, "reads");
+	sprintf(bm_type, "fill_buf");
+
+	check_resctrlfs_support();
+	filter_dmesg();
+
+	if (!is_amd && mbm_test) {
+		printf("# Starting MBM BW change ...\n");
+		if (!has_ben)
+			sprintf(benchmark_cmd[5], "%s", "mba");
+		res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+		printf("%sok MBM: bw change\n", res ? "not " : "");
+		mbm_test_cleanup();
+		tests_run++;
+	}
+
+	if (!is_amd && mba_test) {
+		printf("# Starting MBA Schemata change ...\n");
+		if (!has_ben)
+			sprintf(benchmark_cmd[1], "%d", span);
+		res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+		printf("%sok MBA: schemata change\n", res ? "not " : "");
+		mba_test_cleanup();
+		tests_run++;
+	}
+
+	if (cqm_test) {
+		printf("# Starting CQM test ...\n");
+		if (!has_ben)
+			sprintf(benchmark_cmd[5], "%s", "cqm");
+		res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd);
+		printf("%sok CQM: test\n", res ? "not " : "");
+		cqm_test_cleanup();
+		tests_run++;
+	}
+
+	if (cat_test) {
+		printf("# Starting CAT test ...\n");
+		res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
+		printf("%sok CAT: test\n", res ? "not " : "");
+		tests_run++;
+		cat_test_cleanup();
+	}
+
+	printf("1..%d\n", tests_run);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
new file mode 100644
index 000000000000..520fea3606d1
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory bandwidth monitoring and allocation library
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define UNCORE_IMC		"uncore_imc"
+#define READ_FILE_NAME		"events/cas_count_read"
+#define WRITE_FILE_NAME		"events/cas_count_write"
+#define DYN_PMU_PATH		"/sys/bus/event_source/devices"
+#define SCALE			0.00006103515625
+#define MAX_IMCS		20
+#define MAX_TOKENS		5
+#define READ			0
+#define WRITE			1
+#define CON_MON_MBM_LOCAL_BYTES_PATH				\
+	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define CON_MBM_LOCAL_BYTES_PATH		\
+	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define MON_MBM_LOCAL_BYTES_PATH		\
+	"%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define MBM_LOCAL_BYTES_PATH			\
+	"%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define CON_MON_LCC_OCCUP_PATH		\
+	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define CON_LCC_OCCUP_PATH		\
+	"%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define MON_LCC_OCCUP_PATH		\
+	"%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define LCC_OCCUP_PATH			\
+	"%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+struct membw_read_format {
+	__u64 value;         /* The value of the event */
+	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
+	__u64 id;            /* if PERF_FORMAT_ID */
+};
+
+struct imc_counter_config {
+	__u32 type;
+	__u64 event;
+	__u64 umask;
+	struct perf_event_attr pe;
+	struct membw_read_format return_value;
+	int fd;
+};
+
+static char mbm_total_path[1024];
+static int imcs;
+static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
+
+void membw_initialize_perf_event_attr(int i, int j)
+{
+	memset(&imc_counters_config[i][j].pe, 0,
+	       sizeof(struct perf_event_attr));
+	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
+	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
+	imc_counters_config[i][j].pe.disabled = 1;
+	imc_counters_config[i][j].pe.inherit = 1;
+	imc_counters_config[i][j].pe.exclude_guest = 0;
+	imc_counters_config[i][j].pe.config =
+		imc_counters_config[i][j].umask << 8 |
+		imc_counters_config[i][j].event;
+	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
+	imc_counters_config[i][j].pe.read_format =
+		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
+{
+	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
+	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+void membw_ioctl_perf_event_ioc_disable(int i, int j)
+{
+	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
+}
+
+/*
+ * get_event_and_umask:	Parse config into event and umask
+ * @cas_count_cfg:	Config
+ * @count:		iMC number
+ * @op:			Operation (read/write)
+ */
+void get_event_and_umask(char *cas_count_cfg, int count, bool op)
+{
+	char *token[MAX_TOKENS];
+	int i = 0;
+
+	strcat(cas_count_cfg, ",");
+	token[0] = strtok(cas_count_cfg, "=,");
+
+	for (i = 1; i < MAX_TOKENS; i++)
+		token[i] = strtok(NULL, "=,");
+
+	for (i = 0; i < MAX_TOKENS; i++) {
+		if (!token[i])
+			break;
+		if (strcmp(token[i], "event") == 0) {
+			if (op == READ)
+				imc_counters_config[count][READ].event =
+				strtol(token[i + 1], NULL, 16);
+			else
+				imc_counters_config[count][WRITE].event =
+				strtol(token[i + 1], NULL, 16);
+		}
+		if (strcmp(token[i], "umask") == 0) {
+			if (op == READ)
+				imc_counters_config[count][READ].umask =
+				strtol(token[i + 1], NULL, 16);
+			else
+				imc_counters_config[count][WRITE].umask =
+				strtol(token[i + 1], NULL, 16);
+		}
+	}
+}
+
+static int open_perf_event(int i, int cpu_no, int j)
+{
+	imc_counters_config[i][j].fd =
+		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
+				PERF_FLAG_FD_CLOEXEC);
+
+	if (imc_counters_config[i][j].fd == -1) {
+		fprintf(stderr, "Error opening leader %llx\n",
+			imc_counters_config[i][j].pe.config);
+
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Get type and config (read and write) of an iMC counter */
+static int read_from_imc_dir(char *imc_dir, int count)
+{
+	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
+	FILE *fp;
+
+	/* Get type of iMC counter */
+	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
+	fp = fopen(imc_counter_type, "r");
+	if (!fp) {
+		perror("Failed to open imc counter type file");
+
+		return -1;
+	}
+	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
+		perror("Could not get imc type");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	imc_counters_config[count][WRITE].type =
+				imc_counters_config[count][READ].type;
+
+	/* Get read config */
+	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
+	fp = fopen(imc_counter_cfg, "r");
+	if (!fp) {
+		perror("Failed to open imc config file");
+
+		return -1;
+	}
+	if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+		perror("Could not get imc cas count read");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	get_event_and_umask(cas_count_cfg, count, READ);
+
+	/* Get write config */
+	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
+	fp = fopen(imc_counter_cfg, "r");
+	if (!fp) {
+		perror("Failed to open imc config file");
+
+		return -1;
+	}
+	if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+		perror("Could not get imc cas count write");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	get_event_and_umask(cas_count_cfg, count, WRITE);
+
+	return 0;
+}
+
+/*
+ * A system can have 'n' number of iMC (Integrated Memory Controller)
+ * counters, get that 'n'. For each iMC counter get it's type and config.
+ * Also, each counter has two configs, one for read and the other for write.
+ * A config again has two parts, event and umask.
+ * Enumerate all these details into an array of structures.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static int num_of_imcs(void)
+{
+	unsigned int count = 0;
+	char imc_dir[512];
+	struct dirent *ep;
+	int ret;
+	DIR *dp;
+
+	dp = opendir(DYN_PMU_PATH);
+	if (dp) {
+		while ((ep = readdir(dp))) {
+			if (strstr(ep->d_name, UNCORE_IMC)) {
+				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
+					ep->d_name);
+				ret = read_from_imc_dir(imc_dir, count);
+				if (ret) {
+					closedir(dp);
+
+					return ret;
+				}
+				count++;
+			}
+		}
+		closedir(dp);
+		if (count == 0) {
+			perror("Unable find iMC counters!\n");
+
+			return -1;
+		}
+	} else {
+		perror("Unable to open PMU directory!\n");
+
+		return -1;
+	}
+
+	return count;
+}
+
+static int initialize_mem_bw_imc(void)
+{
+	int imc, j;
+
+	imcs = num_of_imcs();
+	if (imcs <= 0)
+		return imcs;
+
+	/* Initialize perf_event_attr structures for all iMC's */
+	for (imc = 0; imc < imcs; imc++) {
+		for (j = 0; j < 2; j++)
+			membw_initialize_perf_event_attr(imc, j);
+	}
+
+	return 0;
+}
+
+/*
+ * get_mem_bw_imc:	Memory band width as reported by iMC counters
+ * @cpu_no:		CPU number that the benchmark PID is binded to
+ * @bw_report:		Bandwidth report type (reads, writes)
+ *
+ * Memory B/W utilized by a process on a socket can be calculated using
+ * iMC counters. Perf events are used to read these counters.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static float get_mem_bw_imc(int cpu_no, char *bw_report)
+{
+	float reads, writes, of_mul_read, of_mul_write;
+	int imc, j, ret;
+
+	/* Start all iMC counters to log values (both read and write) */
+	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
+	for (imc = 0; imc < imcs; imc++) {
+		for (j = 0; j < 2; j++) {
+			ret = open_perf_event(imc, cpu_no, j);
+			if (ret)
+				return -1;
+		}
+		for (j = 0; j < 2; j++)
+			membw_ioctl_perf_event_ioc_reset_enable(imc, j);
+	}
+
+	sleep(1);
+
+	/* Stop counters after a second to get results (both read and write) */
+	for (imc = 0; imc < imcs; imc++) {
+		for (j = 0; j < 2; j++)
+			membw_ioctl_perf_event_ioc_disable(imc, j);
+	}
+
+	/*
+	 * Get results which are stored in struct type imc_counter_config
+	 * Take over flow into consideration before calculating total b/w
+	 */
+	for (imc = 0; imc < imcs; imc++) {
+		struct imc_counter_config *r =
+			&imc_counters_config[imc][READ];
+		struct imc_counter_config *w =
+			&imc_counters_config[imc][WRITE];
+
+		if (read(r->fd, &r->return_value,
+			 sizeof(struct membw_read_format)) == -1) {
+			perror("Couldn't get read b/w through iMC");
+
+			return -1;
+		}
+
+		if (read(w->fd, &w->return_value,
+			 sizeof(struct membw_read_format)) == -1) {
+			perror("Couldn't get write bw through iMC");
+
+			return -1;
+		}
+
+		__u64 r_time_enabled = r->return_value.time_enabled;
+		__u64 r_time_running = r->return_value.time_running;
+
+		if (r_time_enabled != r_time_running)
+			of_mul_read = (float)r_time_enabled /
+					(float)r_time_running;
+
+		__u64 w_time_enabled = w->return_value.time_enabled;
+		__u64 w_time_running = w->return_value.time_running;
+
+		if (w_time_enabled != w_time_running)
+			of_mul_write = (float)w_time_enabled /
+					(float)w_time_running;
+		reads += r->return_value.value * of_mul_read * SCALE;
+		writes += w->return_value.value * of_mul_write * SCALE;
+	}
+
+	for (imc = 0; imc < imcs; imc++) {
+		close(imc_counters_config[imc][READ].fd);
+		close(imc_counters_config[imc][WRITE].fd);
+	}
+
+	if (strcmp(bw_report, "reads") == 0)
+		return reads;
+
+	if (strcmp(bw_report, "writes") == 0)
+		return writes;
+
+	return (reads + writes);
+}
+
+void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
+{
+	if (ctrlgrp && mongrp)
+		sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
+			RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
+	else if (!ctrlgrp && mongrp)
+		sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+			mongrp, resource_id);
+	else if (ctrlgrp && !mongrp)
+		sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+			ctrlgrp, resource_id);
+	else if (!ctrlgrp && !mongrp)
+		sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+			resource_id);
+}
+
+/*
+ * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
+ * @ctrlgrp:			Name of the control monitor group (con_mon grp)
+ * @mongrp:			Name of the monitor group (mon grp)
+ * @cpu_no:			CPU number that the benchmark PID is binded to
+ * @resctrl_val:		Resctrl feature (Eg: mbm, mba.. etc)
+ */
+static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
+				      int cpu_no, char *resctrl_val)
+{
+	int resource_id;
+
+	if (get_resource_id(cpu_no, &resource_id) < 0) {
+		perror("Could not get resource_id");
+		return;
+	}
+
+	if (strcmp(resctrl_val, "mbm") == 0)
+		set_mbm_path(ctrlgrp, mongrp, resource_id);
+
+	if ((strcmp(resctrl_val, "mba") == 0)) {
+		if (ctrlgrp)
+			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
+				RESCTRL_PATH, ctrlgrp, resource_id);
+		else
+			sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
+				RESCTRL_PATH, resource_id);
+	}
+}
+
+/*
+ * Get MBM Local bytes as reported by resctrl FS
+ * For MBM,
+ * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
+ * 2. If only con_mon grp is given, then read from con_mon grp
+ * 3. If both are not given, then read from root con_mon grp
+ * For MBA,
+ * 1. If con_mon grp is given, then read from it
+ * 2. If con_mon grp is not given, then read from root con_mon grp
+ */
+static unsigned long get_mem_bw_resctrl(void)
+{
+	unsigned long mbm_total = 0;
+	FILE *fp;
+
+	fp = fopen(mbm_total_path, "r");
+	if (!fp) {
+		perror("Failed to open total bw file");
+
+		return -1;
+	}
+	if (fscanf(fp, "%lu", &mbm_total) <= 0) {
+		perror("Could not get mbm local bytes");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	return mbm_total;
+}
+
+pid_t bm_pid, ppid;
+
+void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
+{
+	kill(bm_pid, SIGKILL);
+	umount_resctrlfs();
+	tests_cleanup();
+	printf("Ending\n\n");
+
+	exit(EXIT_SUCCESS);
+}
+
+/*
+ * print_results_bw:	the memory bandwidth results are stored in a file
+ * @filename:		file that stores the results
+ * @bm_pid:		child pid that runs benchmark
+ * @bw_imc:		perf imc counter value
+ * @bw_resc:		memory bandwidth value
+ *
+ * Return:		0 on success. non-zero on failure.
+ */
+static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
+			    unsigned long bw_resc)
+{
+	unsigned long diff = fabs(bw_imc - bw_resc);
+	FILE *fp;
+
+	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
+		printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
+		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
+	} else {
+		fp = fopen(filename, "a");
+		if (!fp) {
+			perror("Cannot open results file");
+
+			return errno;
+		}
+		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
+			    bm_pid, bw_imc, bw_resc, diff) <= 0) {
+			fclose(fp);
+			perror("Could not log results.");
+
+			return errno;
+		}
+		fclose(fp);
+	}
+
+	return 0;
+}
+
+static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+{
+	if (strlen(ctrlgrp) && strlen(mongrp))
+		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
+			ctrlgrp, mongrp, sock_num);
+	else if (!strlen(ctrlgrp) && strlen(mongrp))
+		sprintf(llc_occup_path,	MON_LCC_OCCUP_PATH, RESCTRL_PATH,
+			mongrp, sock_num);
+	else if (strlen(ctrlgrp) && !strlen(mongrp))
+		sprintf(llc_occup_path,	CON_LCC_OCCUP_PATH, RESCTRL_PATH,
+			ctrlgrp, sock_num);
+	else if (!strlen(ctrlgrp) && !strlen(mongrp))
+		sprintf(llc_occup_path, LCC_OCCUP_PATH,	RESCTRL_PATH, sock_num);
+}
+
+/*
+ * initialize_llc_occu_resctrl:	Appropriately populate "llc_occup_path"
+ * @ctrlgrp:			Name of the control monitor group (con_mon grp)
+ * @mongrp:			Name of the monitor group (mon grp)
+ * @cpu_no:			CPU number that the benchmark PID is binded to
+ * @resctrl_val:		Resctrl feature (Eg: cat, cqm.. etc)
+ */
+static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
+					int cpu_no, char *resctrl_val)
+{
+	int resource_id;
+
+	if (get_resource_id(cpu_no, &resource_id) < 0) {
+		perror("# Unable to resource_id");
+		return;
+	}
+
+	if (strcmp(resctrl_val, "cqm") == 0)
+		set_cqm_path(ctrlgrp, mongrp, resource_id);
+}
+
+static int
+measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
+{
+	unsigned long bw_imc, bw_resc, bw_resc_end;
+	int ret;
+
+	/*
+	 * Measure memory bandwidth from resctrl and from
+	 * another source which is perf imc value or could
+	 * be something else if perf imc event is not available.
+	 * Compare the two values to validate resctrl value.
+	 * It takes 1sec to measure the data.
+	 */
+	bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
+	if (bw_imc <= 0)
+		return bw_imc;
+
+	bw_resc_end = get_mem_bw_resctrl();
+	if (bw_resc_end <= 0)
+		return bw_resc_end;
+
+	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
+	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
+	if (ret)
+		return ret;
+
+	*bw_resc_start = bw_resc_end;
+
+	return 0;
+}
+
+/*
+ * resctrl_val:	execute benchmark and measure memory bandwidth on
+ *			the benchmark
+ * @benchmark_cmd:	benchmark command and its arguments
+ * @param:		parameters passed to resctrl_val()
+ *
+ * Return:		0 on success. non-zero on failure.
+ */
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+{
+	char *resctrl_val = param->resctrl_val;
+	unsigned long bw_resc_start = 0;
+	struct sigaction sigact;
+	int ret = 0, pipefd[2];
+	char pipe_message = 0;
+	union sigval value;
+
+	if (strcmp(param->filename, "") == 0)
+		sprintf(param->filename, "stdio");
+
+	if ((strcmp(resctrl_val, "mba")) == 0 ||
+	    (strcmp(resctrl_val, "mbm")) == 0) {
+		ret = validate_bw_report_request(param->bw_report);
+		if (ret)
+			return ret;
+	}
+
+	ret = remount_resctrlfs(param->mum_resctrlfs);
+	if (ret)
+		return ret;
+
+	/*
+	 * If benchmark wasn't successfully started by child, then child should
+	 * kill parent, so save parent's pid
+	 */
+	ppid = getpid();
+
+	if (pipe(pipefd)) {
+		perror("# Unable to create pipe");
+
+		return -1;
+	}
+
+	/*
+	 * Fork to start benchmark, save child's pid so that it can be killed
+	 * when needed
+	 */
+	bm_pid = fork();
+	if (bm_pid == -1) {
+		perror("# Unable to fork");
+
+		return -1;
+	}
+
+	if (bm_pid == 0) {
+		/*
+		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
+		 * start benchmark
+		 */
+		sigfillset(&sigact.sa_mask);
+		sigdelset(&sigact.sa_mask, SIGUSR1);
+
+		sigact.sa_sigaction = run_benchmark;
+		sigact.sa_flags = SA_SIGINFO;
+
+		/* Register for "SIGUSR1" signal from parent */
+		if (sigaction(SIGUSR1, &sigact, NULL))
+			PARENT_EXIT("Can't register child for signal");
+
+		/* Tell parent that child is ready */
+		close(pipefd[0]);
+		pipe_message = 1;
+		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
+		    sizeof(pipe_message)) {
+			perror("# failed signaling parent process");
+			close(pipefd[1]);
+			return -1;
+		}
+		close(pipefd[1]);
+
+		/* Suspend child until delivery of "SIGUSR1" from parent */
+		sigsuspend(&sigact.sa_mask);
+
+		PARENT_EXIT("Child is done");
+	}
+
+	printf("# benchmark PID: %d\n", bm_pid);
+
+	/*
+	 * Register CTRL-C handler for parent, as it has to kill benchmark
+	 * before exiting
+	 */
+	sigact.sa_sigaction = ctrlc_handler;
+	sigemptyset(&sigact.sa_mask);
+	sigact.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGINT, &sigact, NULL) ||
+	    sigaction(SIGHUP, &sigact, NULL)) {
+		perror("# sigaction");
+		ret = errno;
+		goto out;
+	}
+
+	value.sival_ptr = benchmark_cmd;
+
+	/* Taskset benchmark to specified cpu */
+	ret = taskset_benchmark(bm_pid, param->cpu_no);
+	if (ret)
+		goto out;
+
+	/* Write benchmark to specified control&monitoring grp in resctrl FS */
+	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+				      resctrl_val);
+	if (ret)
+		goto out;
+
+	if ((strcmp(resctrl_val, "mbm") == 0) ||
+	    (strcmp(resctrl_val, "mba") == 0)) {
+		ret = initialize_mem_bw_imc();
+		if (ret)
+			goto out;
+
+		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
+					  param->cpu_no, resctrl_val);
+	} else if (strcmp(resctrl_val, "cqm") == 0)
+		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
+					    param->cpu_no, resctrl_val);
+
+	/* Parent waits for child to be ready. */
+	close(pipefd[1]);
+	while (pipe_message != 1) {
+		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
+		    sizeof(pipe_message)) {
+			perror("# failed reading message from child process");
+			close(pipefd[0]);
+			goto out;
+		}
+	}
+	close(pipefd[0]);
+
+	/* Signal child to start benchmark */
+	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
+		perror("# sigqueue SIGUSR1 to child");
+		ret = errno;
+		goto out;
+	}
+
+	/* Give benchmark enough time to fully run */
+	sleep(1);
+
+	/* Test runs until the callback setup() tells the test to stop. */
+	while (1) {
+		if ((strcmp(resctrl_val, "mbm") == 0) ||
+		    (strcmp(resctrl_val, "mba") == 0)) {
+			ret = param->setup(1, param);
+			if (ret) {
+				ret = 0;
+				break;
+			}
+
+			ret = measure_vals(param, &bw_resc_start);
+			if (ret)
+				break;
+		} else if (strcmp(resctrl_val, "cqm") == 0) {
+			ret = param->setup(1, param);
+			if (ret) {
+				ret = 0;
+				break;
+			}
+			sleep(1);
+			ret = measure_cache_vals(param, bm_pid);
+			if (ret)
+				break;
+		} else {
+			break;
+		}
+	}
+
+out:
+	kill(bm_pid, SIGKILL);
+	umount_resctrlfs();
+
+	return ret;
+}
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
new file mode 100644
index 000000000000..19c0ec4045a4
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic resctrl file system operations
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ *    Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+int tests_run;
+
+static int find_resctrl_mount(char *buffer)
+{
+	FILE *mounts;
+	char line[256], *fs, *mntpoint;
+
+	mounts = fopen("/proc/mounts", "r");
+	if (!mounts) {
+		perror("/proc/mounts");
+		return -ENXIO;
+	}
+	while (!feof(mounts)) {
+		if (!fgets(line, 256, mounts))
+			break;
+		fs = strtok(line, " \t");
+		if (!fs)
+			continue;
+		mntpoint = strtok(NULL, " \t");
+		if (!mntpoint)
+			continue;
+		fs = strtok(NULL, " \t");
+		if (!fs)
+			continue;
+		if (strcmp(fs, "resctrl"))
+			continue;
+
+		fclose(mounts);
+		if (buffer)
+			strncpy(buffer, mntpoint, 256);
+
+		return 0;
+	}
+
+	fclose(mounts);
+
+	return -ENOENT;
+}
+
+char cbm_mask[256];
+
+/*
+ * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
+ * @mum_resctrlfs:	Should the resctrl FS be remounted?
+ *
+ * If not mounted, mount it.
+ * If mounted and mum_resctrlfs then remount resctrl FS.
+ * If mounted and !mum_resctrlfs then noop
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int remount_resctrlfs(bool mum_resctrlfs)
+{
+	char mountpoint[256];
+	int ret;
+
+	ret = find_resctrl_mount(mountpoint);
+	if (ret)
+		strcpy(mountpoint, RESCTRL_PATH);
+
+	if (!ret && mum_resctrlfs && umount(mountpoint)) {
+		printf("not ok unmounting \"%s\"\n", mountpoint);
+		perror("# umount");
+		tests_run++;
+	}
+
+	if (!ret && !mum_resctrlfs)
+		return 0;
+
+	ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
+	printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "",
+	       RESCTRL_PATH);
+	if (ret)
+		perror("# mount");
+
+	tests_run++;
+
+	return ret;
+}
+
+int umount_resctrlfs(void)
+{
+	if (umount(RESCTRL_PATH)) {
+		perror("# Unable to umount resctrl");
+
+		return errno;
+	}
+
+	return 0;
+}
+
+/*
+ * get_resource_id - Get socket number/l3 id for a specified CPU
+ * @cpu_no:	CPU number
+ * @resource_id: Socket number or l3_id
+ *
+ * Return: >= 0 on success, < 0 on failure.
+ */
+int get_resource_id(int cpu_no, int *resource_id)
+{
+	char phys_pkg_path[1024];
+	FILE *fp;
+
+	if (is_amd)
+		sprintf(phys_pkg_path, "%s%d/cache/index3/id",
+			PHYS_ID_PATH, cpu_no);
+	else
+		sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
+			PHYS_ID_PATH, cpu_no);
+
+	fp = fopen(phys_pkg_path, "r");
+	if (!fp) {
+		perror("Failed to open physical_package_id");
+
+		return -1;
+	}
+	if (fscanf(fp, "%d", resource_id) <= 0) {
+		perror("Could not get socket number or l3 id");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+/*
+ * get_cache_size - Get cache size for a specified CPU
+ * @cpu_no:	CPU number
+ * @cache_type:	Cache level L2/L3
+ * @cache_size:	pointer to cache_size
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
+{
+	char cache_path[1024], cache_str[64];
+	int length, i, cache_num;
+	FILE *fp;
+
+	if (!strcmp(cache_type, "L3")) {
+		cache_num = 3;
+	} else if (!strcmp(cache_type, "L2")) {
+		cache_num = 2;
+	} else {
+		perror("Invalid cache level");
+		return -1;
+	}
+
+	sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
+		cpu_no, cache_num);
+	fp = fopen(cache_path, "r");
+	if (!fp) {
+		perror("Failed to open cache size");
+
+		return -1;
+	}
+	if (fscanf(fp, "%s", cache_str) <= 0) {
+		perror("Could not get cache_size");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	length = (int)strlen(cache_str);
+
+	*cache_size = 0;
+
+	for (i = 0; i < length; i++) {
+		if ((cache_str[i] >= '0') && (cache_str[i] <= '9'))
+
+			*cache_size = *cache_size * 10 + (cache_str[i] - '0');
+
+		else if (cache_str[i] == 'K')
+
+			*cache_size = *cache_size * 1024;
+
+		else if (cache_str[i] == 'M')
+
+			*cache_size = *cache_size * 1024 * 1024;
+
+		else
+			break;
+	}
+
+	return 0;
+}
+
+#define CORE_SIBLINGS_PATH	"/sys/bus/cpu/devices/cpu"
+
+/*
+ * get_cbm_mask - Get cbm mask for given cache
+ * @cache_type:	Cache level L2/L3
+ *
+ * Mask is stored in cbm_mask which is global variable.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_cbm_mask(char *cache_type)
+{
+	char cbm_mask_path[1024];
+	FILE *fp;
+
+	sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type);
+
+	fp = fopen(cbm_mask_path, "r");
+	if (!fp) {
+		perror("Failed to open cache level");
+
+		return -1;
+	}
+	if (fscanf(fp, "%s", cbm_mask) <= 0) {
+		perror("Could not get max cbm_mask");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+/*
+ * get_core_sibling - Get sibling core id from the same socket for given CPU
+ * @cpu_no:	CPU number
+ *
+ * Return:	> 0 on success, < 0 on failure.
+ */
+int get_core_sibling(int cpu_no)
+{
+	char core_siblings_path[1024], cpu_list_str[64];
+	int sibling_cpu_no = -1;
+	FILE *fp;
+
+	sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
+		CORE_SIBLINGS_PATH, cpu_no);
+
+	fp = fopen(core_siblings_path, "r");
+	if (!fp) {
+		perror("Failed to open core siblings path");
+
+		return -1;
+	}
+	if (fscanf(fp, "%s", cpu_list_str) <= 0) {
+		perror("Could not get core_siblings list");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	char *token = strtok(cpu_list_str, "-,");
+
+	while (token) {
+		sibling_cpu_no = atoi(token);
+		/* Skipping core 0 as we don't want to run test on core 0 */
+		if (sibling_cpu_no != 0)
+			break;
+		token = strtok(NULL, "-,");
+	}
+
+	return sibling_cpu_no;
+}
+
+/*
+ * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
+ * @bm_pid:	PID that should be binded
+ * @cpu_no:	CPU number at which the PID would be binded
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int taskset_benchmark(pid_t bm_pid, int cpu_no)
+{
+	cpu_set_t my_set;
+
+	CPU_ZERO(&my_set);
+	CPU_SET(cpu_no, &my_set);
+
+	if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
+		perror("Unable to taskset benchmark");
+
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
+ *		   in specified signal. Direct benchmark stdio to /dev/null.
+ * @signum:	signal number
+ * @info:	signal info
+ * @ucontext:	user context in signal handling
+ *
+ * Return: void
+ */
+void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+{
+	int operation, ret, malloc_and_init_memory, memflush;
+	unsigned long span, buffer_span;
+	char **benchmark_cmd;
+	char resctrl_val[64];
+	FILE *fp;
+
+	benchmark_cmd = info->si_ptr;
+
+	/*
+	 * Direct stdio of child to /dev/null, so that only parent writes to
+	 * stdio (console)
+	 */
+	fp = freopen("/dev/null", "w", stdout);
+	if (!fp)
+		PARENT_EXIT("Unable to direct benchmark status to /dev/null");
+
+	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
+		/* Execute default fill_buf benchmark */
+		span = strtoul(benchmark_cmd[1], NULL, 10);
+		malloc_and_init_memory = atoi(benchmark_cmd[2]);
+		memflush =  atoi(benchmark_cmd[3]);
+		operation = atoi(benchmark_cmd[4]);
+		sprintf(resctrl_val, "%s", benchmark_cmd[5]);
+
+		if (strcmp(resctrl_val, "cqm") != 0)
+			buffer_span = span * MB;
+		else
+			buffer_span = span;
+
+		if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush,
+				 operation, resctrl_val))
+			fprintf(stderr, "Error in running fill buffer\n");
+	} else {
+		/* Execute specified benchmark */
+		ret = execvp(benchmark_cmd[0], benchmark_cmd);
+		if (ret)
+			perror("wrong\n");
+	}
+
+	fclose(stdout);
+	PARENT_EXIT("Unable to run specified benchmark");
+}
+
+/*
+ * create_grp - Create a group only if one doesn't exist
+ * @grp_name:	Name of the group
+ * @grp:	Full path and name of the group
+ * @parent_grp:	Full path and name of the parent group
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
+{
+	int found_grp = 0;
+	struct dirent *ep;
+	DIR *dp;
+
+	/*
+	 * At this point, we are guaranteed to have resctrl FS mounted and if
+	 * length of grp_name == 0, it means, user wants to use root con_mon
+	 * grp, so do nothing
+	 */
+	if (strlen(grp_name) == 0)
+		return 0;
+
+	/* Check if requested grp exists or not */
+	dp = opendir(parent_grp);
+	if (dp) {
+		while ((ep = readdir(dp)) != NULL) {
+			if (strcmp(ep->d_name, grp_name) == 0)
+				found_grp = 1;
+		}
+		closedir(dp);
+	} else {
+		perror("Unable to open resctrl for group");
+
+		return -1;
+	}
+
+	/* Requested grp doesn't exist, hence create it */
+	if (found_grp == 0) {
+		if (mkdir(grp, 0) == -1) {
+			perror("Unable to create group");
+
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int write_pid_to_tasks(char *tasks, pid_t pid)
+{
+	FILE *fp;
+
+	fp = fopen(tasks, "w");
+	if (!fp) {
+		perror("Failed to open tasks file");
+
+		return -1;
+	}
+	if (fprintf(fp, "%d\n", pid) < 0) {
+		perror("Failed to wr pid to tasks file");
+		fclose(fp);
+
+		return -1;
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+/*
+ * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS
+ * @bm_pid:		PID that should be written
+ * @ctrlgrp:		Name of the control monitor group (con_mon grp)
+ * @mongrp:		Name of the monitor group (mon grp)
+ * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
+ *
+ * If a con_mon grp is requested, create it and write pid to it, otherwise
+ * write pid to root con_mon grp.
+ * If a mon grp is requested, create it and write pid to it, otherwise
+ * pid is not written, this means that pid is in con_mon grp and hence
+ * should consult con_mon grp's mon_data directory for results.
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
+			    char *resctrl_val)
+{
+	char controlgroup[128], monitorgroup[512], monitorgroup_p[256];
+	char tasks[1024];
+	int ret = 0;
+
+	if (strlen(ctrlgrp))
+		sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
+	else
+		sprintf(controlgroup, "%s", RESCTRL_PATH);
+
+	/* Create control and monitoring group and write pid into it */
+	ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH);
+	if (ret)
+		goto out;
+	sprintf(tasks, "%s/tasks", controlgroup);
+	ret = write_pid_to_tasks(tasks, bm_pid);
+	if (ret)
+		goto out;
+
+	/* Create mon grp and write pid into it for "mbm" and "cqm" test */
+	if ((strcmp(resctrl_val, "cqm") == 0) ||
+	    (strcmp(resctrl_val, "mbm") == 0)) {
+		if (strlen(mongrp)) {
+			sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
+			sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
+			ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
+			if (ret)
+				goto out;
+
+			sprintf(tasks, "%s/mon_groups/%s/tasks",
+				controlgroup, mongrp);
+			ret = write_pid_to_tasks(tasks, bm_pid);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	printf("%sok writing benchmark parameters to resctrl FS\n",
+	       ret ? "not " : "");
+	if (ret)
+		perror("# writing to resctrlfs");
+
+	tests_run++;
+
+	return ret;
+}
+
+/*
+ * write_schemata - Update schemata of a con_mon grp
+ * @ctrlgrp:		Name of the con_mon grp
+ * @schemata:		Schemata that should be updated to
+ * @cpu_no:		CPU number that the benchmark PID is binded to
+ * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
+ *
+ * Update schemata of a con_mon grp *only* if requested resctrl feature is
+ * allocation type
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
+{
+	char controlgroup[1024], schema[1024], reason[64];
+	int resource_id, ret = 0;
+	FILE *fp;
+
+	if ((strcmp(resctrl_val, "mba") != 0) &&
+	    (strcmp(resctrl_val, "cat") != 0) &&
+	    (strcmp(resctrl_val, "cqm") != 0))
+		return -ENOENT;
+
+	if (!schemata) {
+		printf("# Skipping empty schemata update\n");
+
+		return -1;
+	}
+
+	if (get_resource_id(cpu_no, &resource_id) < 0) {
+		sprintf(reason, "Failed to get resource id");
+		ret = -1;
+
+		goto out;
+	}
+
+	if (strlen(ctrlgrp) != 0)
+		sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp);
+	else
+		sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
+
+	if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm"))
+		sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
+	if (strcmp(resctrl_val, "mba") == 0)
+		sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
+
+	fp = fopen(controlgroup, "w");
+	if (!fp) {
+		sprintf(reason, "Failed to open control group");
+		ret = -1;
+
+		goto out;
+	}
+
+	if (fprintf(fp, "%s\n", schema) < 0) {
+		sprintf(reason, "Failed to write schemata in control group");
+		fclose(fp);
+		ret = -1;
+
+		goto out;
+	}
+	fclose(fp);
+
+out:
+	printf("%sok Write schema \"%s\" to resctrl FS%s%s\n",
+	       ret ? "not " : "", schema, ret ? " # " : "",
+	       ret ? reason : "");
+	tests_run++;
+
+	return ret;
+}
+
+bool check_resctrlfs_support(void)
+{
+	FILE *inf = fopen("/proc/filesystems", "r");
+	DIR *dp;
+	char *res;
+	bool ret = false;
+
+	if (!inf)
+		return false;
+
+	res = fgrep(inf, "nodev\tresctrl\n");
+
+	if (res) {
+		ret = true;
+		free(res);
+	}
+
+	fclose(inf);
+
+	printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not ");
+	tests_run++;
+
+	dp = opendir(RESCTRL_PATH);
+	printf("%sok resctrl mountpoint \"%s\" exists\n",
+	       dp ? "" : "not ", RESCTRL_PATH);
+	if (dp)
+		closedir(dp);
+	tests_run++;
+
+	printf("# resctrl filesystem %s mounted\n",
+	       find_resctrl_mount(NULL) ? "not" : "is");
+
+	return ret;
+}
+
+char *fgrep(FILE *inf, const char *str)
+{
+	char line[256];
+	int slen = strlen(str);
+
+	while (!feof(inf)) {
+		if (!fgets(line, 256, inf))
+			break;
+		if (strncmp(line, str, slen))
+			continue;
+
+		return strdup(line);
+	}
+
+	return NULL;
+}
+
+/*
+ * validate_resctrl_feature_request - Check if requested feature is valid.
+ * @resctrl_val:	Requested feature
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+bool validate_resctrl_feature_request(char *resctrl_val)
+{
+	FILE *inf = fopen("/proc/cpuinfo", "r");
+	bool found = false;
+	char *res;
+
+	if (!inf)
+		return false;
+
+	res = fgrep(inf, "flags");
+
+	if (res) {
+		char *s = strchr(res, ':');
+
+		found = s && !strstr(s, resctrl_val);
+		free(res);
+	}
+	fclose(inf);
+
+	return found;
+}
+
+int filter_dmesg(void)
+{
+	char line[1024];
+	FILE *fp;
+	int pipefds[2];
+	pid_t pid;
+	int ret;
+
+	ret = pipe(pipefds);
+	if (ret) {
+		perror("pipe");
+		return ret;
+	}
+	pid = fork();
+	if (pid == 0) {
+		close(pipefds[0]);
+		dup2(pipefds[1], STDOUT_FILENO);
+		execlp("dmesg", "dmesg", NULL);
+		perror("executing dmesg");
+		exit(1);
+	}
+	close(pipefds[1]);
+	fp = fdopen(pipefds[0], "r");
+	if (!fp) {
+		perror("fdopen(pipe)");
+		kill(pid, SIGTERM);
+
+		return -1;
+	}
+
+	while (fgets(line, 1024, fp)) {
+		if (strstr(line, "intel_rdt:"))
+			printf("# dmesg: %s", line);
+		if (strstr(line, "resctrl:"))
+			printf("# dmesg: %s", line);
+	}
+	fclose(fp);
+	waitpid(pid, NULL, 0);
+
+	return 0;
+}
+
+int validate_bw_report_request(char *bw_report)
+{
+	if (strcmp(bw_report, "reads") == 0)
+		return 0;
+	if (strcmp(bw_report, "writes") == 0)
+		return 0;
+	if (strcmp(bw_report, "nt-writes") == 0) {
+		strcpy(bw_report, "writes");
+		return 0;
+	}
+	if (strcmp(bw_report, "total") == 0)
+		return 0;
+
+	fprintf(stderr, "Requested iMC B/W report type unavailable\n");
+
+	return -1;
+}
+
+int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
+		    int group_fd, unsigned long flags)
+{
+	int ret;
+
+	ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
+		      group_fd, flags);
+	return ret;
+}
+
+unsigned int count_bits(unsigned long n)
+{
+	unsigned int count = 0;
+
+	while (n) {
+		count += n & 1;
+		n >>= 1;
+	}
+
+	return count;
+}
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index cc610da7e369..5910888ebfe1 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 basic_percpu_ops_test
 basic_test
 basic_rseq_op_test
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
index d0ad44f6294a..fb2d533aa575 100644
--- a/tools/testing/selftests/rtc/.gitignore
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 rtctest
 setdate
diff --git a/tools/testing/selftests/safesetid/.gitignore b/tools/testing/selftests/safesetid/.gitignore
index 9c1a629bca01..25d3db172907 100644
--- a/tools/testing/selftests/safesetid/.gitignore
+++ b/tools/testing/selftests/safesetid/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 safesetid-test
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
index 5af29d3a1b0a..dec678577f9c 100644
--- a/tools/testing/selftests/seccomp/.gitignore
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 seccomp_bpf
 seccomp_benchmark
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 1760b3e39730..0ebfe8b0e147 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,17 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-all:
-
-include ../lib.mk
-
-.PHONY: all clean
-
-BINARIES := seccomp_bpf seccomp_benchmark
 CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
 
-seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
-	$(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@
-
-TEST_PROGS += $(BINARIES)
-EXTRA_CLEAN := $(BINARIES)
-
-all: $(BINARIES)
+TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
+include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index ee1b727ede04..89fb3e0b552e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -212,6 +212,10 @@ struct seccomp_notif_sizes {
 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -909,7 +913,7 @@ TEST(ERRNO_order)
 	EXPECT_EQ(12, errno);
 }
 
-FIXTURE_DATA(TRAP) {
+FIXTURE(TRAP) {
 	struct sock_fprog prog;
 };
 
@@ -1020,7 +1024,7 @@ TEST_F(TRAP, handler)
 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
 }
 
-FIXTURE_DATA(precedence) {
+FIXTURE(precedence) {
 	struct sock_fprog allow;
 	struct sock_fprog log;
 	struct sock_fprog trace;
@@ -1509,7 +1513,7 @@ void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
 	EXPECT_EQ(0, ret);
 }
 
-FIXTURE_DATA(TRACE_poke) {
+FIXTURE(TRACE_poke) {
 	struct sock_fprog prog;
 	pid_t tracer;
 	long poked;
@@ -1817,7 +1821,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 		change_syscall(_metadata, tracee, -1, -ESRCH);
 }
 
-FIXTURE_DATA(TRACE_syscall) {
+FIXTURE(TRACE_syscall) {
 	struct sock_fprog prog;
 	pid_t tracer, mytid, mypid, parent;
 };
@@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags)
 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
 				 SECCOMP_FILTER_FLAG_LOG,
 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
-				 SECCOMP_FILTER_FLAG_NEW_LISTENER };
+				 SECCOMP_FILTER_FLAG_NEW_LISTENER,
+				 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
 	unsigned int exclusive[] = {
 				SECCOMP_FILTER_FLAG_TSYNC,
 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
@@ -2321,7 +2326,7 @@ struct tsync_sibling {
 		}							\
 	} while (0)
 
-FIXTURE_DATA(TSYNC) {
+FIXTURE(TSYNC) {
 	struct sock_fprog root_prog, apply_prog;
 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
 	sem_t started;
@@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
 }
 
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
+{
+	long ret, flags;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	flags = SECCOMP_FILTER_FLAG_TSYNC | \
+		SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
+	ASSERT_EQ(ESRCH, errno) {
+		TH_LOG("Did not return ESRCH for diverged sibling.");
+	}
+	ASSERT_EQ(-1, ret) {
+		TH_LOG("Did not fail on diverged sibling.");
+	}
+
+	/* Wake the threads */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both unkilled. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
 TEST_F(TSYNC, two_siblings_not_under_filter)
 {
 	long ret, sib;
@@ -3196,6 +3250,24 @@ TEST(user_notification_basic)
 	EXPECT_EQ(0, WEXITSTATUS(status));
 }
 
+TEST(user_notification_with_tsync)
+{
+	int ret;
+	unsigned int flags;
+
+	/* these were exclusive */
+	flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
+		SECCOMP_FILTER_FLAG_TSYNC;
+	ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* but now they're not */
+	flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+	ret = user_trap_syscall(__NR_getppid, flags);
+	close(ret);
+	ASSERT_LE(0, ret);
+}
+
 TEST(user_notification_kill_in_middle)
 {
 	pid_t pid;
diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore
index 35897b0a3f44..50a19a8888ce 100644
--- a/tools/testing/selftests/sigaltstack/.gitignore
+++ b/tools/testing/selftests/sigaltstack/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 sas
diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore
index 189b7818de34..923e18eed1a0 100644
--- a/tools/testing/selftests/size/.gitignore
+++ b/tools/testing/selftests/size/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 get_size
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
index 90e835ed74e6..0331f77373b5 100644
--- a/tools/testing/selftests/sparc64/drivers/.gitignore
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 adi-test
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index 1e23fefd68e8..d5a2da428752 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 default_file_splice_read
diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore
index f5091e7792f2..f1152357712f 100644
--- a/tools/testing/selftests/sync/.gitignore
+++ b/tools/testing/selftests/sync/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 sync_test
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
index c26d72e0166f..d52f65de23b4 100644
--- a/tools/testing/selftests/tc-testing/.gitignore
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 __pycache__/
 *.pyc
 plugins/
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 477bc61b374a..c33a7aac27ff 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -31,6 +31,7 @@ CONFIG_NET_EMATCH_U32=m
 CONFIG_NET_EMATCH_META=m
 CONFIG_NET_EMATCH_TEXT=m
 CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_CANID=m
 CONFIG_NET_EMATCH_IPT=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=m
@@ -57,3 +58,9 @@ CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
 CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_ETS=m
+
+#
+## Network testing
+#
+CONFIG_CAN=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index 98a20faf3198..e788c114a484 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -372,5 +372,907 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
+    },
+    {
+        "id": "bae4",
+        "name": "Add basic filter with u32 ematch u8/zero offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 0x0f at 0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(01000000/0f000000 at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e6cb",
+        "name": "Add basic filter with u32 ematch u8/zero offset and invalid value >0xFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/0f000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7727",
+        "name": "Add basic filter with u32 ematch u8/positive offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0x1f at 12)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17000000/1f000000 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a429",
+        "name": "Add basic filter with u32 ematch u8/invalid mask >0xFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff00 at 12)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000/ff000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8373",
+        "name": "Add basic filter with u32 ematch u8/missing offset",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff at)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "ab8e",
+        "name": "Add basic filter with u32 ematch u8/missing AT keyword",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "712d",
+        "name": "Add basic filter with u32 ematch u8/missing value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 at 12)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "350f",
+        "name": "Add basic filter with u32 ematch u8/non-numeric value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 zero 0xff at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e28f",
+        "name": "Add basic filter with u32 ematch u8/non-numeric mask",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 mask at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11000000/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6d5f",
+        "name": "Add basic filter with u32 ematch u8/negative offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at -14)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(0000a000/0000f000 at -16\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "12dc",
+        "name": "Add basic filter with u32 ematch u8/nexthdr+ offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at nexthdr+0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1d85",
+        "name": "Add basic filter with u32 ematch u16/zero offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x1122 0xffff at 0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/ffff0000 at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3672",
+        "name": "Add basic filter with u32 ematch u16/zero offset and invalid value >0xFFFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x112233 0xffff at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223300/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7fb0",
+        "name": "Add basic filter with u32 ematch u16/positive offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0x1fff at 12)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17880000/1fff0000 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "19af",
+        "name": "Add basic filter with u32 ematch u16/invalid mask >0xFFFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffffffff at 12)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffffffff at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "446d",
+        "name": "Add basic filter with u32 ematch u16/missing offset",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff at)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "151b",
+        "name": "Add basic filter with u32 ematch u16/missing AT keyword",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "bb23",
+        "name": "Add basic filter with u32 ematch u16/missing value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 at 12)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "decc",
+        "name": "Add basic filter with u32 ematch u16/non-numeric value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 zero 0xffff at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e988",
+        "name": "Add basic filter with u32 ematch u16/non-numeric mask",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 mask at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "07d8",
+        "name": "Add basic filter with u32 ematch u16/negative offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xffff at -12)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabb0000/ffff0000 at -12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f474",
+        "name": "Add basic filter with u32 ematch u16/nexthdr+ offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "47a0",
+        "name": "Add basic filter with u32 ematch u32/zero offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at 0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "849f",
+        "name": "Add basic filter with u32 ematch u32/positive offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227080/1ffff0f0 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d288",
+        "name": "Add basic filter with u32 ematch u32/missing offset",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0xffffffff at)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227788/ffffffff at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4998",
+        "name": "Add basic filter with u32 ematch u32/missing AT keyword",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x77889900 0xfffff0f0 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77889900/fffff0f0 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1f0a",
+        "name": "Add basic filter with u32 ematch u32/missing value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 at 12)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "848e",
+        "name": "Add basic filter with u32 ematch u32/non-numeric value",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 zero 0xffff at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f748",
+        "name": "Add basic filter with u32 ematch u32/non-numeric mask",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11223344 mask at 0)' classid 1:1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223344/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "55a6",
+        "name": "Add basic filter with u32 ematch u32/negative offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aa00cc00/ff00ff00 at -12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7282",
+        "name": "Add basic filter with u32 ematch u32/nexthdr+ offset and default action",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b2b6",
+        "name": "Add basic filter with canid ematch and single SFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f67f",
+        "name": "Add basic filter with canid ematch and single SFF with mask",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaabb:0x00ff)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x2BB:0xFF\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "bd5c",
+        "name": "Add basic filter with canid ematch and multiple SFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1 sff 2 sff 3)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "83c7",
+        "name": "Add basic filter with canid ematch and multiple SFF with masks",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a8f5",
+        "name": "Add basic filter with canid ematch and single EFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "98ae",
+        "name": "Add basic filter with canid ematch and single EFF with mask",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaabb:0xf1f1)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAABB:0xF1F1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6056",
+        "name": "Add basic filter with canid ematch and multiple EFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1 eff 2 eff 3)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d188",
+        "name": "Add basic filter with canid ematch and multiple EFF with masks",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "25d1",
+        "name": "Add basic filter with canid ematch and a combination of SFF/EFF",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01 eff 0x02)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2 sff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b438",
+        "name": "Add basic filter with canid ematch and a combination of SFF/EFF with masks",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01:0xf eff 0x02:0xf)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
new file mode 100644
index 000000000000..0703a2a255eb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
@@ -0,0 +1,185 @@
+[
+    {
+        "id": "8b6e",
+        "name": "Create RED with no flags",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "342e",
+        "name": "Create RED with adaptive flag",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2d4b",
+        "name": "Create RED with ECN flag",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "650f",
+        "name": "Create RED with flags ECN, adaptive",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5f15",
+        "name": "Create RED with flags ECN, harddrop",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "53e8",
+        "name": "Create RED with flags ECN, nodrop",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "d091",
+        "name": "Fail to create RED with only nodrop flag",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "af8e",
+        "name": "Create RED with flags ECN, nodrop, harddrop",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index 789f21e81028..2e43851b47c1 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 clock_nanosleep
 exec
 gettime_perf
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index 87b47b557a7a..e40dc5be2f66 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -11,7 +11,6 @@
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>
-#include <time.h>
 #include <string.h>
 
 #include "log.h"
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 43d93f4006b9..7f14f0fdac84 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -12,7 +12,6 @@
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
-#include <time.h>
 
 #include "log.h"
 #include "timens.h"
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 559d26e21ba0..098be7c83be3 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -10,7 +10,6 @@
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
-#include <time.h>
 #include <string.h>
 
 #include "log.h"
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 0cca7aafc4bd..96dba11ebe44 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -11,7 +11,6 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <signal.h>
-#include <time.h>
 
 #include "log.h"
 #include "timens.h"
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 32a9eadb2d4e..bb5326ff900b 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 alarmtimer-suspend
 change_skew
 clocksource-switch
diff --git a/tools/testing/selftests/tmpfs/.gitignore b/tools/testing/selftests/tmpfs/.gitignore
index a96838fad74d..b1afaa925905 100644
--- a/tools/testing/selftests/tmpfs/.gitignore
+++ b/tools/testing/selftests/tmpfs/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /bug-link-o-tmpfile
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 133bf9ee986c..382cfb39a1a3 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 vdso_test
 vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 31b3c98b6d34..0edb6d900e8d 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 hugepage-mmap
 hugepage-shm
 map_hugetlb
@@ -14,3 +15,4 @@ virtual_address_range
 gup_benchmark
 va_128TBswitch
 map_fixed_noreplace
+write_to_hugetlbfs
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 7f9a8a8c31da..d31db052dff6 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -14,6 +14,7 @@ TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mremap_dontunmap
 TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
@@ -22,6 +23,7 @@ TEST_GEN_FILES += userfaultfd
 ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += write_to_hugetlbfs
 endif
 
 TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
new file mode 100644
index 000000000000..18d33684faad
--- /dev/null
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -0,0 +1,575 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+if [[ $(id -u) -ne 0 ]]; then
+  echo "This test must be run as root. Skipping..."
+  exit 0
+fi
+
+fault_limit_file=limit_in_bytes
+reservation_limit_file=rsvd.limit_in_bytes
+fault_usage_file=usage_in_bytes
+reservation_usage_file=rsvd.usage_in_bytes
+
+if [[ "$1" == "-cgroup-v2" ]]; then
+  cgroup2=1
+  fault_limit_file=max
+  reservation_limit_file=rsvd.max
+  fault_usage_file=current
+  reservation_usage_file=rsvd.current
+fi
+
+cgroup_path=/dev/cgroup/memory
+if [[ ! -e $cgroup_path ]]; then
+  mkdir -p $cgroup_path
+  if [[ $cgroup2 ]]; then
+    mount -t cgroup2 none $cgroup_path
+  else
+    mount -t cgroup memory,hugetlb $cgroup_path
+  fi
+fi
+
+if [[ $cgroup2 ]]; then
+  echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
+fi
+
+function cleanup() {
+  if [[ $cgroup2 ]]; then
+    echo $$ >$cgroup_path/cgroup.procs
+  else
+    echo $$ >$cgroup_path/tasks
+  fi
+
+  if [[ -e /mnt/huge ]]; then
+    rm -rf /mnt/huge/*
+    umount /mnt/huge || echo error
+    rmdir /mnt/huge
+  fi
+  if [[ -e $cgroup_path/hugetlb_cgroup_test ]]; then
+    rmdir $cgroup_path/hugetlb_cgroup_test
+  fi
+  if [[ -e $cgroup_path/hugetlb_cgroup_test1 ]]; then
+    rmdir $cgroup_path/hugetlb_cgroup_test1
+  fi
+  if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then
+    rmdir $cgroup_path/hugetlb_cgroup_test2
+  fi
+  echo 0 >/proc/sys/vm/nr_hugepages
+  echo CLEANUP DONE
+}
+
+function expect_equal() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$expected" != "$actual" ]]; then
+    echo "expected ($expected) != actual ($actual): $3"
+    cleanup
+    exit 1
+  fi
+}
+
+function get_machine_hugepage_size() {
+  hpz=$(grep -i hugepagesize /proc/meminfo)
+  kb=${hpz:14:-3}
+  mb=$(($kb / 1024))
+  echo $mb
+}
+
+MB=$(get_machine_hugepage_size)
+
+function setup_cgroup() {
+  local name="$1"
+  local cgroup_limit="$2"
+  local reservation_limit="$3"
+
+  mkdir $cgroup_path/$name
+
+  echo writing cgroup limit: "$cgroup_limit"
+  echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
+
+  echo writing reseravation limit: "$reservation_limit"
+  echo "$reservation_limit" > \
+    $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
+
+  if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then
+    echo 0 >$cgroup_path/$name/cpuset.cpus
+  fi
+  if [ -e "$cgroup_path/$name/cpuset.mems" ]; then
+    echo 0 >$cgroup_path/$name/cpuset.mems
+  fi
+}
+
+function wait_for_hugetlb_memory_to_get_depleted() {
+  local cgroup="$1"
+  local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+  # Wait for hugetlbfs memory to get depleted.
+  while [ $(cat $path) != 0 ]; do
+    echo Waiting for hugetlb memory to get depleted.
+    cat $path
+    sleep 0.5
+  done
+}
+
+function wait_for_hugetlb_memory_to_get_reserved() {
+  local cgroup="$1"
+  local size="$2"
+
+  local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+  # Wait for hugetlbfs memory to get written.
+  while [ $(cat $path) != $size ]; do
+    echo Waiting for hugetlb memory reservation to reach size $size.
+    cat $path
+    sleep 0.5
+  done
+}
+
+function wait_for_hugetlb_memory_to_get_written() {
+  local cgroup="$1"
+  local size="$2"
+
+  local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+  # Wait for hugetlbfs memory to get written.
+  while [ $(cat $path) != $size ]; do
+    echo Waiting for hugetlb memory to reach size $size.
+    cat $path
+    sleep 0.5
+  done
+}
+
+function write_hugetlbfs_and_get_usage() {
+  local cgroup="$1"
+  local size="$2"
+  local populate="$3"
+  local write="$4"
+  local path="$5"
+  local method="$6"
+  local private="$7"
+  local expect_failure="$8"
+  local reserve="$9"
+
+  # Function return values.
+  reservation_failed=0
+  oom_killed=0
+  hugetlb_difference=0
+  reserved_difference=0
+
+  local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file
+  local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file
+
+  local hugetlb_before=$(cat $hugetlb_usage)
+  local reserved_before=$(cat $reserved_usage)
+
+  echo
+  echo Starting:
+  echo hugetlb_usage="$hugetlb_before"
+  echo reserved_usage="$reserved_before"
+  echo expect_failure is "$expect_failure"
+
+  output=$(mktemp)
+  set +e
+  if [[ "$method" == "1" ]] || [[ "$method" == 2 ]] ||
+    [[ "$private" == "-r" ]] && [[ "$expect_failure" != 1 ]]; then
+
+    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
+      "$cgroup" "$path" "$method" "$private" "-l" "$reserve" 2>&1 | tee $output &
+
+    local write_result=$?
+    local write_pid=$!
+
+    until grep -q -i "DONE" $output; do
+      echo waiting for DONE signal.
+      if ! ps $write_pid > /dev/null
+      then
+        echo "FAIL: The write died"
+        cleanup
+        exit 1
+      fi
+      sleep 0.5
+    done
+
+    echo ================= write_hugetlb_memory.sh output is:
+    cat $output
+    echo ================= end output.
+
+    if [[ "$populate" == "-o" ]] || [[ "$write" == "-w" ]]; then
+      wait_for_hugetlb_memory_to_get_written "$cgroup" "$size"
+    elif [[ "$reserve" != "-n" ]]; then
+      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
+    else
+      # This case doesn't produce visible effects, but we still have
+      # to wait for the async process to start and execute...
+      sleep 0.5
+    fi
+
+    echo write_result is $write_result
+  else
+    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
+      "$cgroup" "$path" "$method" "$private" "$reserve"
+    local write_result=$?
+
+    if [[ "$reserve" != "-n" ]]; then
+      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
+    fi
+  fi
+  set -e
+
+  if [[ "$write_result" == 1 ]]; then
+    reservation_failed=1
+  fi
+
+  # On linus/master, the above process gets SIGBUS'd on oomkill, with
+  # return code 135. On earlier kernels, it gets actual oomkill, with return
+  # code 137, so just check for both conditions in case we're testing
+  # against an earlier kernel.
+  if [[ "$write_result" == 135 ]] || [[ "$write_result" == 137 ]]; then
+    oom_killed=1
+  fi
+
+  local hugetlb_after=$(cat $hugetlb_usage)
+  local reserved_after=$(cat $reserved_usage)
+
+  echo After write:
+  echo hugetlb_usage="$hugetlb_after"
+  echo reserved_usage="$reserved_after"
+
+  hugetlb_difference=$(($hugetlb_after - $hugetlb_before))
+  reserved_difference=$(($reserved_after - $reserved_before))
+}
+
+function cleanup_hugetlb_memory() {
+  set +e
+  local cgroup="$1"
+  if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then
+    echo killing write_to_hugetlbfs
+    killall -2 write_to_hugetlbfs
+    wait_for_hugetlb_memory_to_get_depleted $cgroup
+  fi
+  set -e
+
+  if [[ -e /mnt/huge ]]; then
+    rm -rf /mnt/huge/*
+    umount /mnt/huge
+    rmdir /mnt/huge
+  fi
+}
+
+function run_test() {
+  local size=$(($1 * ${MB} * 1024 * 1024))
+  local populate="$2"
+  local write="$3"
+  local cgroup_limit=$(($4 * ${MB} * 1024 * 1024))
+  local reservation_limit=$(($5 * ${MB} * 1024 * 1024))
+  local nr_hugepages="$6"
+  local method="$7"
+  local private="$8"
+  local expect_failure="$9"
+  local reserve="${10}"
+
+  # Function return values.
+  hugetlb_difference=0
+  reserved_difference=0
+  reservation_failed=0
+  oom_killed=0
+
+  echo nr hugepages = "$nr_hugepages"
+  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
+
+  setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit"
+
+  mkdir -p /mnt/huge
+  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+
+  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \
+    "$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \
+    "$reserve"
+
+  cleanup_hugetlb_memory "hugetlb_cgroup_test"
+
+  local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file)
+  local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file)
+
+  echo $hugetlb_difference
+  echo $reserved_difference
+  expect_equal "0" "$final_hugetlb" "final hugetlb is not zero"
+  expect_equal "0" "$final_reservation" "final reservation is not zero"
+}
+
+function run_multiple_cgroup_test() {
+  local size1="$1"
+  local populate1="$2"
+  local write1="$3"
+  local cgroup_limit1="$4"
+  local reservation_limit1="$5"
+
+  local size2="$6"
+  local populate2="$7"
+  local write2="$8"
+  local cgroup_limit2="$9"
+  local reservation_limit2="${10}"
+
+  local nr_hugepages="${11}"
+  local method="${12}"
+  local private="${13}"
+  local expect_failure="${14}"
+  local reserve="${15}"
+
+  # Function return values.
+  hugetlb_difference1=0
+  reserved_difference1=0
+  reservation_failed1=0
+  oom_killed1=0
+
+  hugetlb_difference2=0
+  reserved_difference2=0
+  reservation_failed2=0
+  oom_killed2=0
+
+  echo nr hugepages = "$nr_hugepages"
+  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
+
+  setup_cgroup "hugetlb_cgroup_test1" "$cgroup_limit1" "$reservation_limit1"
+  setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2"
+
+  mkdir -p /mnt/huge
+  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+
+  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \
+    "$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \
+    "$expect_failure" "$reserve"
+
+  hugetlb_difference1=$hugetlb_difference
+  reserved_difference1=$reserved_difference
+  reservation_failed1=$reservation_failed
+  oom_killed1=$oom_killed
+
+  local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file
+  local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file
+  local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file
+  local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file
+
+  local usage_before_second_write=$(cat $cgroup1_hugetlb_usage)
+  local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage)
+
+  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test2" "$size2" \
+    "$populate2" "$write2" "/mnt/huge/test2" "$method" "$private" \
+    "$expect_failure" "$reserve"
+
+  hugetlb_difference2=$hugetlb_difference
+  reserved_difference2=$reserved_difference
+  reservation_failed2=$reservation_failed
+  oom_killed2=$oom_killed
+
+  expect_equal "$usage_before_second_write" \
+    "$(cat $cgroup1_hugetlb_usage)" "Usage changed."
+  expect_equal "$reservation_usage_before_second_write" \
+    "$(cat $cgroup1_reservation_usage)" "Reservation usage changed."
+
+  cleanup_hugetlb_memory
+
+  local final_hugetlb=$(cat $cgroup1_hugetlb_usage)
+  local final_reservation=$(cat $cgroup1_reservation_usage)
+
+  expect_equal "0" "$final_hugetlb" \
+    "hugetlbt_cgroup_test1 final hugetlb is not zero"
+  expect_equal "0" "$final_reservation" \
+    "hugetlbt_cgroup_test1 final reservation is not zero"
+
+  local final_hugetlb=$(cat $cgroup2_hugetlb_usage)
+  local final_reservation=$(cat $cgroup2_reservation_usage)
+
+  expect_equal "0" "$final_hugetlb" \
+    "hugetlb_cgroup_test2 final hugetlb is not zero"
+  expect_equal "0" "$final_reservation" \
+    "hugetlb_cgroup_test2 final reservation is not zero"
+}
+
+cleanup
+
+for populate in "" "-o"; do
+  for method in 0 1 2; do
+    for private in "" "-r"; do
+      for reserve in "" "-n"; do
+
+        # Skip mmap(MAP_HUGETLB | MAP_SHARED). Doesn't seem to be supported.
+        if [[ "$method" == 1 ]] && [[ "$private" == "" ]]; then
+          continue
+        fi
+
+        # Skip populated shmem tests. Doesn't seem to be supported.
+        if [[ "$method" == 2"" ]] && [[ "$populate" == "-o" ]]; then
+          continue
+        fi
+
+        if [[ "$method" == 2"" ]] && [[ "$reserve" == "-n" ]]; then
+          continue
+        fi
+
+        cleanup
+        echo
+        echo
+        echo
+        echo Test normal case.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+        run_test 5 "$populate" "" 10 10 10 "$method" "$private" "0" "$reserve"
+
+        echo Memory charged to hugtlb=$hugetlb_difference
+        echo Memory charged to reservation=$reserved_difference
+
+        if [[ "$populate" == "-o" ]]; then
+          expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
+            "Reserved memory charged to hugetlb cgroup."
+        else
+          expect_equal "0" "$hugetlb_difference" \
+            "Reserved memory charged to hugetlb cgroup."
+        fi
+
+        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
+          expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
+            "Reserved memory not charged to reservation usage."
+        else
+          expect_equal "0" "$reserved_difference" \
+            "Reserved memory not charged to reservation usage."
+        fi
+
+        echo 'PASS'
+
+        cleanup
+        echo
+        echo
+        echo
+        echo Test normal case with write.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+        run_test 5 "$populate" '-w' 5 5 10 "$method" "$private" "0" "$reserve"
+
+        echo Memory charged to hugtlb=$hugetlb_difference
+        echo Memory charged to reservation=$reserved_difference
+
+        expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
+          "Reserved memory charged to hugetlb cgroup."
+
+        expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
+          "Reserved memory not charged to reservation usage."
+
+        echo 'PASS'
+
+        cleanup
+        continue
+        echo
+        echo
+        echo
+        echo Test more than reservation case.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+
+        if [ "$reserve" != "-n" ]; then
+          run_test "5" "$populate" '' "10" "2" "10" "$method" "$private" "1" \
+            "$reserve"
+
+          expect_equal "1" "$reservation_failed" "Reservation succeeded."
+        fi
+
+        echo 'PASS'
+
+        cleanup
+
+        echo
+        echo
+        echo
+        echo Test more than cgroup limit case.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+
+        # Not sure if shm memory can be cleaned up when the process gets sigbus'd.
+        if [[ "$method" != 2 ]]; then
+          run_test 5 "$populate" "-w" 2 10 10 "$method" "$private" "1" "$reserve"
+
+          expect_equal "1" "$oom_killed" "Not oom killed."
+        fi
+        echo 'PASS'
+
+        cleanup
+
+        echo
+        echo
+        echo
+        echo Test normal case, multiple cgroups.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+        run_multiple_cgroup_test "3" "$populate" "" "10" "10" "5" \
+          "$populate" "" "10" "10" "10" \
+          "$method" "$private" "0" "$reserve"
+
+        echo Memory charged to hugtlb1=$hugetlb_difference1
+        echo Memory charged to reservation1=$reserved_difference1
+        echo Memory charged to hugtlb2=$hugetlb_difference2
+        echo Memory charged to reservation2=$reserved_difference2
+
+        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
+          expect_equal "3" "$reserved_difference1" \
+            "Incorrect reservations charged to cgroup 1."
+
+          expect_equal "5" "$reserved_difference2" \
+            "Incorrect reservation charged to cgroup 2."
+
+        else
+          expect_equal "0" "$reserved_difference1" \
+            "Incorrect reservations charged to cgroup 1."
+
+          expect_equal "0" "$reserved_difference2" \
+            "Incorrect reservation charged to cgroup 2."
+        fi
+
+        if [[ "$populate" == "-o" ]]; then
+          expect_equal "3" "$hugetlb_difference1" \
+            "Incorrect hugetlb charged to cgroup 1."
+
+          expect_equal "5" "$hugetlb_difference2" \
+            "Incorrect hugetlb charged to cgroup 2."
+
+        else
+          expect_equal "0" "$hugetlb_difference1" \
+            "Incorrect hugetlb charged to cgroup 1."
+
+          expect_equal "0" "$hugetlb_difference2" \
+            "Incorrect hugetlb charged to cgroup 2."
+        fi
+        echo 'PASS'
+
+        cleanup
+        echo
+        echo
+        echo
+        echo Test normal case with write, multiple cgroups.
+        echo private=$private, populate=$populate, method=$method, reserve=$reserve
+        run_multiple_cgroup_test "3" "$populate" "-w" "10" "10" "5" \
+          "$populate" "-w" "10" "10" "10" \
+          "$method" "$private" "0" "$reserve"
+
+        echo Memory charged to hugtlb1=$hugetlb_difference1
+        echo Memory charged to reservation1=$reserved_difference1
+        echo Memory charged to hugtlb2=$hugetlb_difference2
+        echo Memory charged to reservation2=$reserved_difference2
+
+        expect_equal "3" "$hugetlb_difference1" \
+          "Incorrect hugetlb charged to cgroup 1."
+
+        expect_equal "3" "$reserved_difference1" \
+          "Incorrect reservation charged to cgroup 1."
+
+        expect_equal "5" "$hugetlb_difference2" \
+          "Incorrect hugetlb charged to cgroup 2."
+
+        expect_equal "5" "$reserved_difference2" \
+          "Incorrected reservation charged to cgroup 2."
+        echo 'PASS'
+
+        cleanup
+
+      done # reserve
+    done   # private
+  done     # populate
+done       # method
+
+umount $cgroup_path
+rmdir $cgroup_path
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 389327e9b30a..43b4dfe161a2 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -18,6 +18,10 @@
 #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
 #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
 
+/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
+#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
+#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
+
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
@@ -40,8 +44,14 @@ int main(int argc, char **argv)
 	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
 		switch (opt) {
+		case 'a':
+			cmd = PIN_FAST_BENCHMARK;
+			break;
+		case 'b':
+			cmd = PIN_BENCHMARK;
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -63,6 +73,9 @@ int main(int argc, char **argv)
 		case 'U':
 			cmd = GUP_BENCHMARK;
 			break;
+		case 'u':
+			cmd = GUP_FAST_BENCHMARK;
+			break;
 		case 'w':
 			write = 1;
 			break;
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
new file mode 100644
index 000000000000..d11d1febccc3
--- /dev/null
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+if [[ $(id -u) -ne 0 ]]; then
+  echo "This test must be run as root. Skipping..."
+  exit 0
+fi
+
+usage_file=usage_in_bytes
+
+if [[ "$1" == "-cgroup-v2" ]]; then
+  cgroup2=1
+  usage_file=current
+fi
+
+CGROUP_ROOT='/dev/cgroup/memory'
+MNT='/mnt/huge/'
+
+if [[ ! -e $CGROUP_ROOT ]]; then
+  mkdir -p $CGROUP_ROOT
+  if [[ $cgroup2 ]]; then
+    mount -t cgroup2 none $CGROUP_ROOT
+    sleep 1
+    echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+  else
+    mount -t cgroup memory,hugetlb $CGROUP_ROOT
+  fi
+fi
+
+function get_machine_hugepage_size() {
+  hpz=$(grep -i hugepagesize /proc/meminfo)
+  kb=${hpz:14:-3}
+  mb=$(($kb / 1024))
+  echo $mb
+}
+
+MB=$(get_machine_hugepage_size)
+
+function cleanup() {
+  echo cleanup
+  set +e
+  rm -rf "$MNT"/* 2>/dev/null
+  umount "$MNT" 2>/dev/null
+  rmdir "$MNT" 2>/dev/null
+  rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
+  rmdir "$CGROUP_ROOT"/a 2>/dev/null
+  rmdir "$CGROUP_ROOT"/test1 2>/dev/null
+  echo 0 >/proc/sys/vm/nr_hugepages
+  set -e
+}
+
+function assert_state() {
+  local expected_a="$1"
+  local expected_a_hugetlb="$2"
+  local expected_b=""
+  local expected_b_hugetlb=""
+
+  if [ ! -z ${3:-} ] && [ ! -z ${4:-} ]; then
+    expected_b="$3"
+    expected_b_hugetlb="$4"
+  fi
+  local tolerance=$((5 * 1024 * 1024))
+
+  local actual_a
+  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
+  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
+    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
+    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
+    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
+    echo fail
+
+    cleanup
+    exit 1
+  fi
+
+  local actual_a_hugetlb
+  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
+  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
+    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
+    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
+    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
+    echo fail
+
+    cleanup
+    exit 1
+  fi
+
+  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
+    return
+  fi
+
+  local actual_b
+  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
+  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
+    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
+    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
+    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
+    echo fail
+
+    cleanup
+    exit 1
+  fi
+
+  local actual_b_hugetlb
+  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
+  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
+    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
+    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
+    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
+    echo fail
+
+    cleanup
+    exit 1
+  fi
+}
+
+function setup() {
+  echo 100 >/proc/sys/vm/nr_hugepages
+  mkdir "$CGROUP_ROOT"/a
+  sleep 1
+  if [[ $cgroup2 ]]; then
+    echo "+hugetlb +memory" >$CGROUP_ROOT/a/cgroup.subtree_control
+  else
+    echo 0 >$CGROUP_ROOT/a/cpuset.mems
+    echo 0 >$CGROUP_ROOT/a/cpuset.cpus
+  fi
+
+  mkdir "$CGROUP_ROOT"/a/b
+
+  if [[ ! $cgroup2 ]]; then
+    echo 0 >$CGROUP_ROOT/a/b/cpuset.mems
+    echo 0 >$CGROUP_ROOT/a/b/cpuset.cpus
+  fi
+
+  mkdir -p "$MNT"
+  mount -t hugetlbfs none "$MNT"
+}
+
+write_hugetlbfs() {
+  local cgroup="$1"
+  local path="$2"
+  local size="$3"
+
+  if [[ $cgroup2 ]]; then
+    echo $$ >$CGROUP_ROOT/$cgroup/cgroup.procs
+  else
+    echo 0 >$CGROUP_ROOT/$cgroup/cpuset.mems
+    echo 0 >$CGROUP_ROOT/$cgroup/cpuset.cpus
+    echo $$ >"$CGROUP_ROOT/$cgroup/tasks"
+  fi
+  ./write_to_hugetlbfs -p "$path" -s "$size" -m 0 -o
+  if [[ $cgroup2 ]]; then
+    echo $$ >$CGROUP_ROOT/cgroup.procs
+  else
+    echo $$ >"$CGROUP_ROOT/tasks"
+  fi
+  echo
+}
+
+set -e
+
+size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
+
+cleanup
+
+echo
+echo
+echo Test charge, rmdir, uncharge
+setup
+echo mkdir
+mkdir $CGROUP_ROOT/test1
+
+echo write
+write_hugetlbfs test1 "$MNT"/test $size
+
+echo rmdir
+rmdir $CGROUP_ROOT/test1
+mkdir $CGROUP_ROOT/test1
+
+echo uncharge
+rm -rf /mnt/huge/*
+
+cleanup
+
+echo done
+echo
+echo
+if [[ ! $cgroup2 ]]; then
+  echo "Test parent and child hugetlb usage"
+  setup
+
+  echo write
+  write_hugetlbfs a "$MNT"/test $size
+
+  echo Assert memory charged correctly for parent use.
+  assert_state 0 $size 0 0
+
+  write_hugetlbfs a/b "$MNT"/test2 $size
+
+  echo Assert memory charged correctly for child use.
+  assert_state 0 $(($size * 2)) 0 $size
+
+  rmdir "$CGROUP_ROOT"/a/b
+  sleep 5
+  echo Assert memory reparent correctly.
+  assert_state 0 $(($size * 2))
+
+  rm -rf "$MNT"/*
+  umount "$MNT"
+  echo Assert memory uncharged correctly.
+  assert_state 0 0
+
+  cleanup
+fi
+
+echo
+echo
+echo "Test child only hugetlb usage"
+echo setup
+setup
+
+echo write
+write_hugetlbfs a/b "$MNT"/test2 $size
+
+echo Assert memory charged correctly for child only use.
+assert_state 0 $(($size)) 0 $size
+
+rmdir "$CGROUP_ROOT"/a/b
+echo Assert memory reparent correctly.
+assert_state 0 $size
+
+rm -rf "$MNT"/*
+umount "$MNT"
+echo Assert memory uncharged correctly.
+assert_state 0 0
+
+cleanup
+
+echo ALL PASS
+
+umount $CGROUP_ROOT
+rm -rf $CGROUP_ROOT
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 5a2d7b8efc40..6af951900aa3 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -45,20 +45,20 @@ static void check_bytes(char *addr)
 	printf("First hex is %x\n", *((unsigned int *)addr));
 }
 
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t length)
 {
 	unsigned long i;
 
-	for (i = 0; i < LENGTH; i++)
+	for (i = 0; i < length; i++)
 		*(addr + i) = (char)i;
 }
 
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t length)
 {
 	unsigned long i;
 
 	check_bytes(addr);
-	for (i = 0; i < LENGTH; i++)
+	for (i = 0; i < length; i++)
 		if (*(addr + i) != (char)i) {
 			printf("Mismatch at %lu\n", i);
 			return 1;
@@ -96,11 +96,11 @@ int main(int argc, char **argv)
 
 	printf("Returned address is %p\n", addr);
 	check_bytes(addr);
-	write_bytes(addr);
-	ret = read_bytes(addr);
+	write_bytes(addr, length);
+	ret = read_bytes(addr, length);
 
 	/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
-	if (munmap(addr, LENGTH)) {
+	if (munmap(addr, length)) {
 		perror("munmap");
 		exit(1);
 	}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
index 637b6d0ac0d0..11b2301f3aa3 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -67,59 +67,6 @@ out:
 	return ret;
 }
 
-static uint64_t get_pageflags(unsigned long addr)
-{
-	FILE *file;
-	uint64_t pfn;
-	unsigned long offset;
-
-	file = fopen("/proc/self/pagemap", "r");
-	if (!file) {
-		perror("fopen pagemap");
-		_exit(1);
-	}
-
-	offset = addr / getpagesize() * sizeof(pfn);
-
-	if (fseek(file, offset, SEEK_SET)) {
-		perror("fseek pagemap");
-		_exit(1);
-	}
-
-	if (fread(&pfn, sizeof(pfn), 1, file) != 1) {
-		perror("fread pagemap");
-		_exit(1);
-	}
-
-	fclose(file);
-	return pfn;
-}
-
-static uint64_t get_kpageflags(unsigned long pfn)
-{
-	uint64_t flags;
-	FILE *file;
-
-	file = fopen("/proc/kpageflags", "r");
-	if (!file) {
-		perror("fopen kpageflags");
-		_exit(1);
-	}
-
-	if (fseek(file, pfn * sizeof(flags), SEEK_SET)) {
-		perror("fseek kpageflags");
-		_exit(1);
-	}
-
-	if (fread(&flags, sizeof(flags), 1, file) != 1) {
-		perror("fread kpageflags");
-		_exit(1);
-	}
-
-	fclose(file);
-	return flags;
-}
-
 #define VMFLAGS "VmFlags:"
 
 static bool is_vmflag_set(unsigned long addr, const char *vmflag)
@@ -159,19 +106,13 @@ out:
 #define RSS  "Rss:"
 #define LOCKED "lo"
 
-static bool is_vma_lock_on_fault(unsigned long addr)
+static unsigned long get_value_for_name(unsigned long addr, const char *name)
 {
-	bool ret = false;
-	bool locked;
-	FILE *smaps = NULL;
-	unsigned long vma_size, vma_rss;
 	char *line = NULL;
-	char *value;
 	size_t size = 0;
-
-	locked = is_vmflag_set(addr, LOCKED);
-	if (!locked)
-		goto out;
+	char *value_ptr;
+	FILE *smaps = NULL;
+	unsigned long value = -1UL;
 
 	smaps = seek_to_smaps_entry(addr);
 	if (!smaps) {
@@ -180,112 +121,70 @@ static bool is_vma_lock_on_fault(unsigned long addr)
 	}
 
 	while (getline(&line, &size, smaps) > 0) {
-		if (!strstr(line, SIZE)) {
+		if (!strstr(line, name)) {
 			free(line);
 			line = NULL;
 			size = 0;
 			continue;
 		}
 
-		value = line + strlen(SIZE);
-		if (sscanf(value, "%lu kB", &vma_size) < 1) {
+		value_ptr = line + strlen(name);
+		if (sscanf(value_ptr, "%lu kB", &value) < 1) {
 			printf("Unable to parse smaps entry for Size\n");
 			goto out;
 		}
 		break;
 	}
 
-	while (getline(&line, &size, smaps) > 0) {
-		if (!strstr(line, RSS)) {
-			free(line);
-			line = NULL;
-			size = 0;
-			continue;
-		}
-
-		value = line + strlen(RSS);
-		if (sscanf(value, "%lu kB", &vma_rss) < 1) {
-			printf("Unable to parse smaps entry for Rss\n");
-			goto out;
-		}
-		break;
-	}
-
-	ret = locked && (vma_rss < vma_size);
 out:
-	free(line);
 	if (smaps)
 		fclose(smaps);
-	return ret;
+	free(line);
+	return value;
 }
 
-#define PRESENT_BIT     0x8000000000000000ULL
-#define PFN_MASK        0x007FFFFFFFFFFFFFULL
-#define UNEVICTABLE_BIT (1UL << 18)
-
-static int lock_check(char *map)
+static bool is_vma_lock_on_fault(unsigned long addr)
 {
-	unsigned long page_size = getpagesize();
-	uint64_t page1_flags, page2_flags;
+	bool locked;
+	unsigned long vma_size, vma_rss;
 
-	page1_flags = get_pageflags((unsigned long)map);
-	page2_flags = get_pageflags((unsigned long)map + page_size);
+	locked = is_vmflag_set(addr, LOCKED);
+	if (!locked)
+		return false;
 
-	/* Both pages should be present */
-	if (((page1_flags & PRESENT_BIT) == 0) ||
-	    ((page2_flags & PRESENT_BIT) == 0)) {
-		printf("Failed to make both pages present\n");
-		return 1;
-	}
+	vma_size = get_value_for_name(addr, SIZE);
+	vma_rss = get_value_for_name(addr, RSS);
 
-	page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-	page2_flags = get_kpageflags(page2_flags & PFN_MASK);
+	/* only one page is faulted in */
+	return (vma_rss < vma_size);
+}
 
-	/* Both pages should be unevictable */
-	if (((page1_flags & UNEVICTABLE_BIT) == 0) ||
-	    ((page2_flags & UNEVICTABLE_BIT) == 0)) {
-		printf("Failed to make both pages unevictable\n");
-		return 1;
-	}
+#define PRESENT_BIT     0x8000000000000000ULL
+#define PFN_MASK        0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
 
-	if (!is_vmflag_set((unsigned long)map, LOCKED)) {
-		printf("VMA flag %s is missing on page 1\n", LOCKED);
-		return 1;
-	}
+static int lock_check(unsigned long addr)
+{
+	bool locked;
+	unsigned long vma_size, vma_rss;
 
-	if (!is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
-		printf("VMA flag %s is missing on page 2\n", LOCKED);
-		return 1;
-	}
+	locked = is_vmflag_set(addr, LOCKED);
+	if (!locked)
+		return false;
 
-	return 0;
+	vma_size = get_value_for_name(addr, SIZE);
+	vma_rss = get_value_for_name(addr, RSS);
+
+	return (vma_rss == vma_size);
 }
 
 static int unlock_lock_check(char *map)
 {
-	unsigned long page_size = getpagesize();
-	uint64_t page1_flags, page2_flags;
-
-	page1_flags = get_pageflags((unsigned long)map);
-	page2_flags = get_pageflags((unsigned long)map + page_size);
-	page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-	page2_flags = get_kpageflags(page2_flags & PFN_MASK);
-
-	if ((page1_flags & UNEVICTABLE_BIT) || (page2_flags & UNEVICTABLE_BIT)) {
-		printf("A page is still marked unevictable after unlock\n");
-		return 1;
-	}
-
 	if (is_vmflag_set((unsigned long)map, LOCKED)) {
 		printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
 		return 1;
 	}
 
-	if (is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
-		printf("VMA flag %s is present on page 2 after unlock\n", LOCKED);
-		return 1;
-	}
-
 	return 0;
 }
 
@@ -311,7 +210,7 @@ static int test_mlock_lock()
 		goto unmap;
 	}
 
-	if (lock_check(map))
+	if (!lock_check((unsigned long)map))
 		goto unmap;
 
 	/* Now unlock and recheck attributes */
@@ -330,64 +229,18 @@ out:
 
 static int onfault_check(char *map)
 {
-	unsigned long page_size = getpagesize();
-	uint64_t page1_flags, page2_flags;
-
-	page1_flags = get_pageflags((unsigned long)map);
-	page2_flags = get_pageflags((unsigned long)map + page_size);
-
-	/* Neither page should be present */
-	if ((page1_flags & PRESENT_BIT) || (page2_flags & PRESENT_BIT)) {
-		printf("Pages were made present by MLOCK_ONFAULT\n");
-		return 1;
-	}
-
 	*map = 'a';
-	page1_flags = get_pageflags((unsigned long)map);
-	page2_flags = get_pageflags((unsigned long)map + page_size);
-
-	/* Only page 1 should be present */
-	if ((page1_flags & PRESENT_BIT) == 0) {
-		printf("Page 1 is not present after fault\n");
-		return 1;
-	} else if (page2_flags & PRESENT_BIT) {
-		printf("Page 2 was made present\n");
-		return 1;
-	}
-
-	page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-
-	/* Page 1 should be unevictable */
-	if ((page1_flags & UNEVICTABLE_BIT) == 0) {
-		printf("Failed to make faulted page unevictable\n");
-		return 1;
-	}
-
 	if (!is_vma_lock_on_fault((unsigned long)map)) {
 		printf("VMA is not marked for lock on fault\n");
 		return 1;
 	}
 
-	if (!is_vma_lock_on_fault((unsigned long)map + page_size)) {
-		printf("VMA is not marked for lock on fault\n");
-		return 1;
-	}
-
 	return 0;
 }
 
 static int unlock_onfault_check(char *map)
 {
 	unsigned long page_size = getpagesize();
-	uint64_t page1_flags;
-
-	page1_flags = get_pageflags((unsigned long)map);
-	page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-
-	if (page1_flags & UNEVICTABLE_BIT) {
-		printf("Page 1 is still marked unevictable after unlock\n");
-		return 1;
-	}
 
 	if (is_vma_lock_on_fault((unsigned long)map) ||
 	    is_vma_lock_on_fault((unsigned long)map + page_size)) {
@@ -445,7 +298,6 @@ static int test_lock_onfault_of_present()
 	char *map;
 	int ret = 1;
 	unsigned long page_size = getpagesize();
-	uint64_t page1_flags, page2_flags;
 
 	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
 		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
@@ -465,17 +317,6 @@ static int test_lock_onfault_of_present()
 		goto unmap;
 	}
 
-	page1_flags = get_pageflags((unsigned long)map);
-	page2_flags = get_pageflags((unsigned long)map + page_size);
-	page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-	page2_flags = get_kpageflags(page2_flags & PFN_MASK);
-
-	/* Page 1 should be unevictable */
-	if ((page1_flags & UNEVICTABLE_BIT) == 0) {
-		printf("Failed to make present page unevictable\n");
-		goto unmap;
-	}
-
 	if (!is_vma_lock_on_fault((unsigned long)map) ||
 	    !is_vma_lock_on_fault((unsigned long)map + page_size)) {
 		printf("VMA with present pages is not marked lock on fault\n");
@@ -507,7 +348,7 @@ static int test_munlockall()
 		goto out;
 	}
 
-	if (lock_check(map))
+	if (!lock_check((unsigned long)map))
 		goto unmap;
 
 	if (munlockall()) {
@@ -549,7 +390,7 @@ static int test_munlockall()
 		goto out;
 	}
 
-	if (lock_check(map))
+	if (!lock_check((unsigned long)map))
 		goto unmap;
 
 	if (munlockall()) {
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c
new file mode 100644
index 000000000000..ee06cb0b9efb
--- /dev/null
+++ b/tools/testing/selftests/vm/mremap_dontunmap.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Tests for mremap w/ MREMAP_DONTUNMAP.
+ *
+ * Copyright 2020, Brian Geffon <bgeffon@google.com>
+ */
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#ifndef MREMAP_DONTUNMAP
+#define MREMAP_DONTUNMAP 4
+#endif
+
+unsigned long page_size;
+char *page_buffer;
+
+static void dump_maps(void)
+{
+	char cmd[32];
+
+	snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+	system(cmd);
+}
+
+#define BUG_ON(condition, description)					      \
+	do {								      \
+		if (condition) {					      \
+			fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \
+				__LINE__, (description), strerror(errno));    \
+			dump_maps();					  \
+			exit(1);					      \
+		} 							      \
+	} while (0)
+
+// Try a simple operation for to "test" for kernel support this prevents
+// reporting tests as failed when it's run on an older kernel.
+static int kernel_support_for_mremap_dontunmap()
+{
+	int ret = 0;
+	unsigned long num_pages = 1;
+	void *source_mapping = mmap(NULL, num_pages * page_size, PROT_NONE,
+				    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+	// This simple remap should only fail if MREMAP_DONTUNMAP isn't
+	// supported.
+	void *dest_mapping =
+	    mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+		   MREMAP_DONTUNMAP | MREMAP_MAYMOVE, 0);
+	if (dest_mapping == MAP_FAILED) {
+		ret = errno;
+	} else {
+		BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+		       "unable to unmap destination mapping");
+	}
+
+	BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+	       "unable to unmap source mapping");
+	return ret;
+}
+
+// This helper will just validate that an entire mapping contains the expected
+// byte.
+static int check_region_contains_byte(void *addr, unsigned long size, char byte)
+{
+	BUG_ON(size & (page_size - 1),
+	       "check_region_contains_byte expects page multiples");
+	BUG_ON((unsigned long)addr & (page_size - 1),
+	       "check_region_contains_byte expects page alignment");
+
+	memset(page_buffer, byte, page_size);
+
+	unsigned long num_pages = size / page_size;
+	unsigned long i;
+
+	// Compare each page checking that it contains our expected byte.
+	for (i = 0; i < num_pages; ++i) {
+		int ret =
+		    memcmp(addr + (i * page_size), page_buffer, page_size);
+		if (ret) {
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+// this test validates that MREMAP_DONTUNMAP moves the pagetables while leaving
+// the source mapping mapped.
+static void mremap_dontunmap_simple()
+{
+	unsigned long num_pages = 5;
+
+	void *source_mapping =
+	    mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+	memset(source_mapping, 'a', num_pages * page_size);
+
+	// Try to just move the whole mapping anywhere (not fixed).
+	void *dest_mapping =
+	    mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+		   MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+	BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+	// Validate that the pages have been moved, we know they were moved if
+	// the dest_mapping contains a's.
+	BUG_ON(check_region_contains_byte
+	       (dest_mapping, num_pages * page_size, 'a') != 0,
+	       "pages did not migrate");
+	BUG_ON(check_region_contains_byte
+	       (source_mapping, num_pages * page_size, 0) != 0,
+	       "source should have no ptes");
+
+	BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+	       "unable to unmap destination mapping");
+	BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+	       "unable to unmap source mapping");
+}
+
+// This test validates MREMAP_DONTUNMAP will move page tables to a specific
+// destination using MREMAP_FIXED, also while validating that the source
+// remains intact.
+static void mremap_dontunmap_simple_fixed()
+{
+	unsigned long num_pages = 5;
+
+	// Since we want to guarantee that we can remap to a point, we will
+	// create a mapping up front.
+	void *dest_mapping =
+	    mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(dest_mapping == MAP_FAILED, "mmap");
+	memset(dest_mapping, 'X', num_pages * page_size);
+
+	void *source_mapping =
+	    mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(source_mapping == MAP_FAILED, "mmap");
+	memset(source_mapping, 'a', num_pages * page_size);
+
+	void *remapped_mapping =
+	    mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+		   MREMAP_FIXED | MREMAP_DONTUNMAP | MREMAP_MAYMOVE,
+		   dest_mapping);
+	BUG_ON(remapped_mapping == MAP_FAILED, "mremap");
+	BUG_ON(remapped_mapping != dest_mapping,
+	       "mremap should have placed the remapped mapping at dest_mapping");
+
+	// The dest mapping will have been unmap by mremap so we expect the Xs
+	// to be gone and replaced with a's.
+	BUG_ON(check_region_contains_byte
+	       (dest_mapping, num_pages * page_size, 'a') != 0,
+	       "pages did not migrate");
+
+	// And the source mapping will have had its ptes dropped.
+	BUG_ON(check_region_contains_byte
+	       (source_mapping, num_pages * page_size, 0) != 0,
+	       "source should have no ptes");
+
+	BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+	       "unable to unmap destination mapping");
+	BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+	       "unable to unmap source mapping");
+}
+
+// This test validates that we can MREMAP_DONTUNMAP for a portion of an
+// existing mapping.
+static void mremap_dontunmap_partial_mapping()
+{
+	/*
+	 *  source mapping:
+	 *  --------------
+	 *  | aaaaaaaaaa |
+	 *  --------------
+	 *  to become:
+	 *  --------------
+	 *  | aaaaa00000 |
+	 *  --------------
+	 *  With the destination mapping containing 5 pages of As.
+	 *  ---------
+	 *  | aaaaa |
+	 *  ---------
+	 */
+	unsigned long num_pages = 10;
+	void *source_mapping =
+	    mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(source_mapping == MAP_FAILED, "mmap");
+	memset(source_mapping, 'a', num_pages * page_size);
+
+	// We will grab the last 5 pages of the source and move them.
+	void *dest_mapping =
+	    mremap(source_mapping + (5 * page_size), 5 * page_size,
+		   5 * page_size,
+		   MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+	BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+	// We expect the first 5 pages of the source to contain a's and the
+	// final 5 pages to contain zeros.
+	BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 'a') !=
+	       0, "first 5 pages of source should have original pages");
+	BUG_ON(check_region_contains_byte
+	       (source_mapping + (5 * page_size), 5 * page_size, 0) != 0,
+	       "final 5 pages of source should have no ptes");
+
+	// Finally we expect the destination to have 5 pages worth of a's.
+	BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') !=
+	       0, "dest mapping should contain ptes from the source");
+
+	BUG_ON(munmap(dest_mapping, 5 * page_size) == -1,
+	       "unable to unmap destination mapping");
+	BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+	       "unable to unmap source mapping");
+}
+
+// This test validates that we can remap over only a portion of a mapping.
+static void mremap_dontunmap_partial_mapping_overwrite(void)
+{
+	/*
+	 *  source mapping:
+	 *  ---------
+	 *  |aaaaa|
+	 *  ---------
+	 *  dest mapping initially:
+	 *  -----------
+	 *  |XXXXXXXXXX|
+	 *  ------------
+	 *  Source to become:
+	 *  ---------
+	 *  |00000|
+	 *  ---------
+	 *  With the destination mapping containing 5 pages of As.
+	 *  ------------
+	 *  |aaaaaXXXXX|
+	 *  ------------
+	 */
+	void *source_mapping =
+	    mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(source_mapping == MAP_FAILED, "mmap");
+	memset(source_mapping, 'a', 5 * page_size);
+
+	void *dest_mapping =
+	    mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(dest_mapping == MAP_FAILED, "mmap");
+	memset(dest_mapping, 'X', 10 * page_size);
+
+	// We will grab the last 5 pages of the source and move them.
+	void *remapped_mapping =
+	    mremap(source_mapping, 5 * page_size,
+		   5 * page_size,
+		   MREMAP_DONTUNMAP | MREMAP_MAYMOVE | MREMAP_FIXED, dest_mapping);
+	BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+	BUG_ON(dest_mapping != remapped_mapping, "expected to remap to dest_mapping");
+
+	BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 0) !=
+	       0, "first 5 pages of source should have no ptes");
+
+	// Finally we expect the destination to have 5 pages worth of a's.
+	BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') != 0,
+			"dest mapping should contain ptes from the source");
+
+	// Finally the last 5 pages shouldn't have been touched.
+	BUG_ON(check_region_contains_byte(dest_mapping + (5 * page_size),
+				5 * page_size, 'X') != 0,
+			"dest mapping should have retained the last 5 pages");
+
+	BUG_ON(munmap(dest_mapping, 10 * page_size) == -1,
+	       "unable to unmap destination mapping");
+	BUG_ON(munmap(source_mapping, 5 * page_size) == -1,
+	       "unable to unmap source mapping");
+}
+
+int main(void)
+{
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	// test for kernel support for MREMAP_DONTUNMAP skipping the test if
+	// not.
+	if (kernel_support_for_mremap_dontunmap() != 0) {
+		printf("No kernel support for MREMAP_DONTUNMAP\n");
+		return KSFT_SKIP;
+	}
+
+	// Keep a page sized buffer around for when we need it.
+	page_buffer =
+	    mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page.");
+
+	mremap_dontunmap_simple();
+	mremap_dontunmap_simple_fixed();
+	mremap_dontunmap_partial_mapping();
+	mremap_dontunmap_partial_mapping_overwrite();
+
+	BUG_ON(munmap(page_buffer, page_size) == -1,
+	       "unable to unmap page buffer");
+
+	printf("OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index f33714843198..665009ebfba4 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -123,6 +123,28 @@ else
 	echo "[PASS]"
 fi
 
+echo "--------------------------------------------"
+echo "running 'gup_benchmark -U' (normal/slow gup)"
+echo "--------------------------------------------"
+./gup_benchmark -U
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "------------------------------------------"
+echo "running gup_benchmark -b (pin_user_pages)"
+echo "------------------------------------------"
+./gup_benchmark -b
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 echo "-------------------"
 echo "running userfaultfd"
 echo "-------------------"
@@ -270,4 +292,19 @@ else
 	exitcode=1
 fi
 
+echo "------------------------------------"
+echo "running MREMAP_DONTUNMAP smoke test"
+echo "------------------------------------"
+./mremap_dontunmap
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
 exit $exitcode
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
new file mode 100644
index 000000000000..d3d0d108924d
--- /dev/null
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+size=$1
+populate=$2
+write=$3
+cgroup=$4
+path=$5
+method=$6
+private=$7
+want_sleep=$8
+reserve=$9
+
+echo "Putting task in cgroup '$cgroup'"
+echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+
+echo "Method is $method"
+
+set +e
+./write_to_hugetlbfs -p "$path" -s "$size" "$write" "$populate" -m "$method" \
+      "$private" "$want_sleep" "$reserve"
diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/vm/write_to_hugetlbfs.c
new file mode 100644
index 000000000000..110bc4e4015d
--- /dev/null
+++ b/tools/testing/selftests/vm/write_to_hugetlbfs.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program reserves and uses hugetlb memory, supporting a bunch of
+ * scenarios needed by the charged_reserved_hugetlb.sh test.
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/* Global definitions. */
+enum method {
+	HUGETLBFS,
+	MMAP_MAP_HUGETLB,
+	SHM,
+	MAX_METHOD
+};
+
+
+/* Global variables. */
+static const char *self;
+static char *shmaddr;
+static int shmid;
+
+/*
+ * Show usage and exit.
+ */
+static void exit_usage(void)
+{
+	printf("Usage: %s -p <path to hugetlbfs file> -s <size to map> "
+	       "[-m <0=hugetlbfs | 1=mmap(MAP_HUGETLB)>] [-l] [-r] "
+	       "[-o] [-w] [-n]\n",
+	       self);
+	exit(EXIT_FAILURE);
+}
+
+void sig_handler(int signo)
+{
+	printf("Received %d.\n", signo);
+	if (signo == SIGINT) {
+		printf("Deleting the memory\n");
+		if (shmdt((const void *)shmaddr) != 0) {
+			perror("Detach failure");
+			shmctl(shmid, IPC_RMID, NULL);
+			exit(4);
+		}
+
+		shmctl(shmid, IPC_RMID, NULL);
+		printf("Done deleting the memory\n");
+	}
+	exit(2);
+}
+
+int main(int argc, char **argv)
+{
+	int fd = 0;
+	int key = 0;
+	int *ptr = NULL;
+	int c = 0;
+	int size = 0;
+	char path[256] = "";
+	enum method method = MAX_METHOD;
+	int want_sleep = 0, private = 0;
+	int populate = 0;
+	int write = 0;
+	int reserve = 1;
+
+	unsigned long i;
+
+	if (signal(SIGINT, sig_handler) == SIG_ERR)
+		err(1, "\ncan't catch SIGINT\n");
+
+	/* Parse command-line arguments. */
+	setvbuf(stdout, NULL, _IONBF, 0);
+	self = argv[0];
+
+	while ((c = getopt(argc, argv, "s:p:m:owlrn")) != -1) {
+		switch (c) {
+		case 's':
+			size = atoi(optarg);
+			break;
+		case 'p':
+			strncpy(path, optarg, sizeof(path));
+			break;
+		case 'm':
+			if (atoi(optarg) >= MAX_METHOD) {
+				errno = EINVAL;
+				perror("Invalid -m.");
+				exit_usage();
+			}
+			method = atoi(optarg);
+			break;
+		case 'o':
+			populate = 1;
+			break;
+		case 'w':
+			write = 1;
+			break;
+		case 'l':
+			want_sleep = 1;
+			break;
+		case 'r':
+		    private
+			= 1;
+			break;
+		case 'n':
+			reserve = 0;
+			break;
+		default:
+			errno = EINVAL;
+			perror("Invalid arg");
+			exit_usage();
+		}
+	}
+
+	if (strncmp(path, "", sizeof(path)) != 0) {
+		printf("Writing to this path: %s\n", path);
+	} else {
+		errno = EINVAL;
+		perror("path not found");
+		exit_usage();
+	}
+
+	if (size != 0) {
+		printf("Writing this size: %d\n", size);
+	} else {
+		errno = EINVAL;
+		perror("size not found");
+		exit_usage();
+	}
+
+	if (!populate)
+		printf("Not populating.\n");
+	else
+		printf("Populating.\n");
+
+	if (!write)
+		printf("Not writing to memory.\n");
+
+	if (method == MAX_METHOD) {
+		errno = EINVAL;
+		perror("-m Invalid");
+		exit_usage();
+	} else
+		printf("Using method=%d\n", method);
+
+	if (!private)
+		printf("Shared mapping.\n");
+	else
+		printf("Private mapping.\n");
+
+	if (!reserve)
+		printf("NO_RESERVE mapping.\n");
+	else
+		printf("RESERVE mapping.\n");
+
+	switch (method) {
+	case HUGETLBFS:
+		printf("Allocating using HUGETLBFS.\n");
+		fd = open(path, O_CREAT | O_RDWR, 0777);
+		if (fd == -1)
+			err(1, "Failed to open file.");
+
+		ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			   (private ? MAP_PRIVATE : MAP_SHARED) |
+				   (populate ? MAP_POPULATE : 0) |
+				   (reserve ? 0 : MAP_NORESERVE),
+			   fd, 0);
+
+		if (ptr == MAP_FAILED) {
+			close(fd);
+			err(1, "Error mapping the file");
+		}
+		break;
+	case MMAP_MAP_HUGETLB:
+		printf("Allocating using MAP_HUGETLB.\n");
+		ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			   (private ? (MAP_PRIVATE | MAP_ANONYMOUS) :
+				      MAP_SHARED) |
+				   MAP_HUGETLB | (populate ? MAP_POPULATE : 0) |
+				   (reserve ? 0 : MAP_NORESERVE),
+			   -1, 0);
+
+		if (ptr == MAP_FAILED)
+			err(1, "mmap");
+
+		printf("Returned address is %p\n", ptr);
+		break;
+	case SHM:
+		printf("Allocating using SHM.\n");
+		shmid = shmget(key, size,
+			       SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+		if (shmid < 0) {
+			shmid = shmget(++key, size,
+				       SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+			if (shmid < 0)
+				err(1, "shmget");
+		}
+		printf("shmid: 0x%x, shmget key:%d\n", shmid, key);
+
+		ptr = shmat(shmid, NULL, 0);
+		if (ptr == (int *)-1) {
+			perror("Shared memory attach failure");
+			shmctl(shmid, IPC_RMID, NULL);
+			exit(2);
+		}
+		printf("shmaddr: %p\n", ptr);
+
+		break;
+	default:
+		errno = EINVAL;
+		err(1, "Invalid method.");
+	}
+
+	if (write) {
+		printf("Writing to memory.\n");
+		memset(ptr, 1, size);
+	}
+
+	if (want_sleep) {
+		/* Signal to caller that we're done. */
+		printf("DONE\n");
+
+		/* Hold memory until external kill signal is delivered. */
+		while (1)
+			sleep(100);
+	}
+
+	if (method == HUGETLBFS)
+		close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
index 5aac51575c7e..61d7b89cdbca 100644
--- a/tools/testing/selftests/watchdog/.gitignore
+++ b/tools/testing/selftests/watchdog/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 watchdog-test
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 138d46b3f330..936e1ca9410e 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
 n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
 n0 wg set wg0 peer "$pub2" allowed-ips ::/0
 n0 wg set wg0 peer "$pub2" remove
-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
+	n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
+done
+[[ -n $(n0 wg show wg0 peers) ]]
+exec 4< <(n0 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns0 $ncat_pid
+ip0 link set wg0 up
+! read -r -n 1 -t 2 <&4 || false
+kill $ncat_pid
 ip0 link del wg0
 
 declare -A objects
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index 415b542a9d59..bfa15e6feb2f 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 build/
 distfiles/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 28d477683e8a..90598a425c18 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -41,7 +41,7 @@ $(DISTFILES_PATH)/$(1):
 	flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3)  $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
 endef
 
-$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
+$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
 $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
 $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
 $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 90bc9813cadc..c9698120ac9d 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -13,7 +13,6 @@
 #include <fcntl.h>
 #include <sys/wait.h>
 #include <sys/mount.h>
-#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/io.h>
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index af9323a0b6e0..d531de13c95b 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -56,7 +56,6 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_HIGH_RES_TIMERS=y
-CONFIG_COMPAT_32BIT_TIME=y
 CONFIG_ARCH_RANDOM=y
 CONFIG_FILE_LOCKING=y
 CONFIG_POSIX_TIMERS=y
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73ff9a3..022a1f3b64ef 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *_32
 *_64
 single_step_syscall
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 6f22238f3217..12aaa063196e 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -414,8 +414,12 @@ int main()
 
 #if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
 	vsyscall32 = (void *)getauxval(AT_SYSINFO);
-	printf("[RUN]\tCheck AT_SYSINFO return regs\n");
-	test_sys32_regs(do_full_vsyscall32);
+	if (vsyscall32) {
+		printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+		test_sys32_regs(do_full_vsyscall32);
+	} else {
+		printf("[SKIP]\tAT_SYSINFO is not available\n");
+	}
 #endif
 
 	test_ptrace_syscall_restart();
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 35edd61d1663..42052db0f870 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -259,6 +259,11 @@ static void test_one_clock_gettime(int clock, const char *name)
 
 static void test_clock_gettime(void)
 {
+	if (!vdso_clock_gettime) {
+		printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
+		return;
+	}
+
 	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
 	     clock++) {
 		test_one_clock_gettime(clock, clocknames[clock]);
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index 29a5c94c4b50..fe99f2434155 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -15,6 +15,7 @@
 
 #include <err.h>
 #include <stdio.h>
+#include <dlfcn.h>
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
@@ -46,11 +47,23 @@ int main()
 	int nerrs = 0;
 	struct real_sigaction sa;
 
+	void *vdso = dlopen("linux-vdso.so.1",
+			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[SKIP]\tFailed to find vDSO.  Tests are not expected to work.\n");
+		return 0;
+	}
+
 	memset(&sa, 0, sizeof(sa));
 	sa.handler = handler_with_siginfo;
 	sa.flags = SA_SIGINFO;
 	sa.restorer = NULL;	/* request kernel-provided restorer */
 
+	printf("[RUN]\tRaise a signal, SA_SIGINFO, sa.restorer == NULL\n");
+
 	if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
 		err(1, "raw rt_sigaction syscall");
 
@@ -63,6 +76,8 @@ int main()
 		nerrs++;
 	}
 
+	printf("[RUN]\tRaise a signal, !SA_SIGINFO, sa.restorer == NULL\n");
+
 	sa.flags = 0;
 	sa.handler = handler_without_siginfo;
 	if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
index 7f7a2ccc30c4..87ca2731cff9 100644
--- a/tools/testing/vsock/.gitignore
+++ b/tools/testing/vsock/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.d
 vsock_test
 vsock_diag_test
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 1d8b93f1af31..5a4fb80fa832 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -55,6 +55,78 @@ static void test_stream_connection_reset(const struct test_opts *opts)
 	close(fd);
 }
 
+static void test_stream_bind_only_client(const struct test_opts *opts)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = 1234,
+			.svm_cid = opts->peer_cid,
+		},
+	};
+	int ret;
+	int fd;
+
+	/* Wait for the server to be ready */
+	control_expectln("BIND");
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	timeout_begin(TIMEOUT);
+	do {
+		ret = connect(fd, &addr.sa, sizeof(addr.svm));
+		timeout_check("connect");
+	} while (ret < 0 && errno == EINTR);
+	timeout_end();
+
+	if (ret != -1) {
+		fprintf(stderr, "expected connect(2) failure, got %d\n", ret);
+		exit(EXIT_FAILURE);
+	}
+	if (errno != ECONNRESET) {
+		fprintf(stderr, "unexpected connect(2) errno %d\n", errno);
+		exit(EXIT_FAILURE);
+	}
+
+	/* Notify the server that the client has finished */
+	control_writeln("DONE");
+
+	close(fd);
+}
+
+static void test_stream_bind_only_server(const struct test_opts *opts)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = 1234,
+			.svm_cid = VMADDR_CID_ANY,
+		},
+	};
+	int fd;
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+		perror("bind");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Notify the client that the server is ready */
+	control_writeln("BIND");
+
+	/* Wait for the client to finish */
+	control_expectln("DONE");
+
+	close(fd);
+}
+
 static void test_stream_client_close_client(const struct test_opts *opts)
 {
 	int fd;
@@ -213,6 +285,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_connection_reset,
 	},
 	{
+		.name = "SOCK_STREAM bind only",
+		.run_client = test_stream_bind_only_client,
+		.run_server = test_stream_bind_only_server,
+	},
+	{
 		.name = "SOCK_STREAM client close",
 		.run_client = test_stream_client_close_client,
 		.run_server = test_stream_client_close_server,
diff --git a/tools/thermal/tmon/.gitignore b/tools/thermal/tmon/.gitignore
index 06e96be65276..d9e97a0308f5 100644
--- a/tools/thermal/tmon/.gitignore
+++ b/tools/thermal/tmon/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 /tmon
diff --git a/tools/usb/.gitignore b/tools/usb/.gitignore
index 1b7448981435..fce1ef5a9267 100644
--- a/tools/usb/.gitignore
+++ b/tools/usb/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ffs-test
 testusb
diff --git a/tools/usb/usbip/.gitignore b/tools/usb/usbip/.gitignore
index 03b892c8bd8c..597361a96dbb 100644
--- a/tools/usb/usbip/.gitignore
+++ b/tools/usb/usbip/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 Makefile
 Makefile.in
 aclocal.m4
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 1cfbb0157a46..075588c4da08 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.d
 virtio_test
 vringh_test
diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore
index 44f095fa2604..79bb92ae1bb3 100644
--- a/tools/vm/.gitignore
+++ b/tools/vm/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 slabinfo
 page-types