summaryrefslogtreecommitdiffstats
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/nvdimm/Kbuild1
-rw-r--r--tools/testing/nvdimm/acpi_nfit_test.c8
-rw-r--r--tools/testing/nvdimm/test/nfit.c4
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h24
-rw-r--r--tools/testing/selftests/bpf/.gitignore8
-rw-r--r--tools/testing/selftests/bpf/Makefile19
-rw-r--r--tools/testing/selftests/bpf/bpf_flow.c373
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c140
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h24
-rw-r--r--tools/testing/selftests/bpf/netcnt_prog.c71
-rw-r--r--tools/testing/selftests/bpf/test_btf.c179
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c60
-rw-r--r--tools/testing/selftests/bpf/test_flow_dissector.c782
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh115
-rwxr-xr-xtools/testing/selftests/bpf/test_libbpf.sh2
-rw-r--r--tools/testing/selftests/bpf/test_maps.c122
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c158
-rw-r--r--tools/testing/selftests/bpf/test_progs.c157
-rw-r--r--tools/testing/selftests/bpf/test_queue_map.c4
-rw-r--r--tools/testing/selftests/bpf/test_queue_stack_map.h59
-rw-r--r--tools/testing/selftests/bpf/test_section_names.c208
-rw-r--r--tools/testing/selftests/bpf/test_sk_lookup_kern.c180
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c6
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c373
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h97
-rw-r--r--tools/testing/selftests/bpf/test_stack_map.c4
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c38
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c31
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c1623
-rw-r--r--tools/testing/selftests/bpf/test_xdp_vlan.c292
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh195
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c8
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh347
-rw-r--r--tools/testing/selftests/kvm/.gitignore14
-rw-r--r--tools/testing/selftests/kvm/Makefile39
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c374
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h55
-rw-r--r--tools/testing/selftests/kvm/include/evmcs.h1098
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h169
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h6
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h6
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h (renamed from tools/testing/selftests/kvm/include/x86.h)28
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h (renamed from tools/testing/selftests/kvm/include/vmx.h)35
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c311
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c564
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h33
-rw-r--r--tools/testing/selftests/kvm/lib/ucall.c144
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c (renamed from tools/testing/selftests/kvm/lib/x86.c)263
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c (renamed from tools/testing/selftests/kvm/lib/vmx.c)55
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c (renamed from tools/testing/selftests/kvm/cr4_cpuid_sync_test.c)14
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c160
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c (renamed from tools/testing/selftests/kvm/platform_info_test.c)14
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c (renamed from tools/testing/selftests/kvm/set_sregs_test.c)2
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c (renamed from tools/testing/selftests/kvm/state_test.c)47
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c (renamed from tools/testing/selftests/kvm/sync_regs_test.c)2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/vmx_tsc_adjust_test.c)24
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile4
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh14
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh167
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh69
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh27
-rw-r--r--tools/testing/selftests/net/ip_defrag.c393
-rwxr-xr-xtools/testing/selftests/net/ip_defrag.sh39
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh327
-rw-r--r--tools/testing/selftests/net/tls.c57
-rw-r--r--tools/testing/selftests/powerpc/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h1
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h18
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/mm/wild_bctr.c155
-rw-r--r--tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c8
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c228
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c132
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c27
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c9
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h9
-rw-r--r--tools/testing/selftests/powerpc/utils.c152
-rw-r--r--tools/testing/selftests/tc-testing/README2
-rw-r--r--tools/testing/selftests/tc-testing/bpf/Makefile29
-rw-r--r--tools/testing/selftests/tc-testing/bpf/action.c23
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py66
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json16
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json24
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py4
-rw-r--r--tools/testing/selftests/vm/.gitignore1
-rw-r--r--tools/testing/selftests/vm/Makefile1
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c42
-rw-r--r--tools/testing/selftests/vm/map_fixed_noreplace.c206
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c134
99 files changed, 10413 insertions, 1305 deletions
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 0392153a0009..778ceb651000 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm
ACPI_SRC := $(DRIVERS)/acpi/nfit
DAX_SRC := $(DRIVERS)/dax
ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+ccflags-y += -I$(src)/$(ACPI_SRC)/
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
index 43521512e577..fec8fb1b7715 100644
--- a/tools/testing/nvdimm/acpi_nfit_test.c
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -4,5 +4,13 @@
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
+#include <nfit.h>
nfit_test_watermark(acpi_nfit);
+
+/* strong / override definition of nfit_intel_shutdown_status */
+void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+ set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+ nfit_mem->dirty_shutdown = 42;
+}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cffc2c5a778d..9527d47a1070 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -24,6 +24,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <nd-core.h>
+#include <intel.h>
#include <nfit.h>
#include <nd.h>
#include "nfit_test.h"
@@ -148,6 +149,7 @@ static const struct nd_intel_smart smart_def = {
| ND_INTEL_SMART_ALARM_VALID
| ND_INTEL_SMART_USED_VALID
| ND_INTEL_SMART_SHUTDOWN_VALID
+ | ND_INTEL_SMART_SHUTDOWN_COUNT_VALID
| ND_INTEL_SMART_MTEMP_VALID
| ND_INTEL_SMART_CTEMP_VALID,
.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
@@ -160,8 +162,8 @@ static const struct nd_intel_smart smart_def = {
.ait_status = 1,
.life_used = 5,
.shutdown_state = 0,
+ .shutdown_count = 42,
.vendor_size = 0,
- .shutdown_count = 100,
};
struct nfit_test_fw {
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 33752e06ff8d..ade14fe3837e 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat {
#define ND_INTEL_SMART_INJECT_FATAL (1 << 2)
#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3)
-struct nd_intel_smart {
- __u32 status;
- union {
- struct {
- __u32 flags;
- __u8 reserved0[4];
- __u8 health;
- __u8 spares;
- __u8 life_used;
- __u8 alarm_flags;
- __u16 media_temperature;
- __u16 ctrl_temperature;
- __u32 shutdown_count;
- __u8 ait_status;
- __u16 pmic_temperature;
- __u8 reserved1[8];
- __u8 shutdown_state;
- __u32 vendor_size;
- __u8 vendor_data[92];
- } __packed;
- __u8 data[128];
- };
-} __packed;
-
struct nd_intel_smart_threshold {
__u32 status;
union {
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 49938d72cf63..1b799e30c06d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -19,3 +19,11 @@ test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
+test_skb_cgroup_id_user
+test_socket_cookie
+test_cgroup_storage
+test_select_reuseport
+test_flow_dissector
+flow_dissector_load
+test_netcnt
+test_section_names
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..e39dfb4e7970 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
- test_socket_cookie test_cgroup_storage test_select_reuseport
+ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
+ test_netcnt
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -35,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o
+ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
+ test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -47,10 +49,15 @@ TEST_PROGS := test_kmod.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_skb_cgroup_id.sh
+ test_skb_cgroup_id.sh \
+ test_flow_dissector.sh \
+ test_xdp_vlan.sh
+
+TEST_PROGS_EXTENDED := with_addr.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+ flow_dissector_load test_flow_dissector
include ../lib.mk
@@ -70,6 +77,7 @@ $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+$(OUTPUT)/test_netcnt: cgroup_helpers.c
.PHONY: force
@@ -110,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..107350a7821d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+ IP,
+ IPV6,
+ IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
+ IPV6FR, /* Fragmentation IPv6 Extension Header */
+ MPLS,
+ VLAN,
+};
+
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+#define IP6_MF 0x0001
+#define IP6_OFFSET 0xFFF8
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+};
+
+struct frag_hdr {
+ __u8 nexthdr;
+ __u8 reserved;
+ __be16 frag_off;
+ __be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+ __u16 hdr_size,
+ void *buffer)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ __u16 nhoff = skb->flow_keys->nhoff;
+ __u8 *hdr;
+
+ /* Verifies this variable offset does not overflow */
+ if (nhoff > (USHRT_MAX - hdr_size))
+ return NULL;
+
+ hdr = data + nhoff;
+ if (hdr + hdr_size <= data_end)
+ return hdr;
+
+ if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+ return NULL;
+
+ return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->n_proto = proto;
+ switch (proto) {
+ case bpf_htons(ETH_P_IP):
+ bpf_tail_call(skb, &jmp_table, IP);
+ break;
+ case bpf_htons(ETH_P_IPV6):
+ bpf_tail_call(skb, &jmp_table, IPV6);
+ break;
+ case bpf_htons(ETH_P_MPLS_MC):
+ case bpf_htons(ETH_P_MPLS_UC):
+ bpf_tail_call(skb, &jmp_table, MPLS);
+ break;
+ case bpf_htons(ETH_P_8021Q):
+ case bpf_htons(ETH_P_8021AD):
+ bpf_tail_call(skb, &jmp_table, VLAN);
+ break;
+ default:
+ /* Protocol not supported */
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+SEC("dissect")
+int _dissect(struct __sk_buff *skb)
+{
+ if (!skb->vlan_present)
+ return parse_eth_proto(skb, skb->protocol);
+ else
+ return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data_end = (void *)(long)skb->data_end;
+ struct icmphdr *icmp, _icmp;
+ struct gre_hdr *gre, _gre;
+ struct ethhdr *eth, _eth;
+ struct tcphdr *tcp, _tcp;
+ struct udphdr *udp, _udp;
+
+ keys->ip_proto = proto;
+ switch (proto) {
+ case IPPROTO_ICMP:
+ icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+ if (!icmp)
+ return BPF_DROP;
+ return BPF_OK;
+ case IPPROTO_IPIP:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+ case IPPROTO_IPV6:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+ case IPPROTO_GRE:
+ gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+ if (!gre)
+ return BPF_DROP;
+
+ if (bpf_htons(gre->flags & GRE_VERSION))
+ /* Only inspect standard GRE packets with version 0 */
+ return BPF_OK;
+
+ keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+ if (GRE_IS_CSUM(gre->flags))
+ keys->nhoff += 4; /* Step over chksum and Padding */
+ if (GRE_IS_KEY(gre->flags))
+ keys->nhoff += 4; /* Step over key */
+ if (GRE_IS_SEQ(gre->flags))
+ keys->nhoff += 4; /* Step over sequence number */
+
+ keys->is_encap = true;
+
+ if (gre->proto == bpf_htons(ETH_P_TEB)) {
+ eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+ &_eth);
+ if (!eth)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*eth);
+
+ return parse_eth_proto(skb, eth->h_proto);
+ } else {
+ return parse_eth_proto(skb, gre->proto);
+ }
+ case IPPROTO_TCP:
+ tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+ if (!tcp)
+ return BPF_DROP;
+
+ if (tcp->doff < 5)
+ return BPF_DROP;
+
+ if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+ return BPF_DROP;
+
+ keys->thoff = keys->nhoff;
+ keys->sport = tcp->source;
+ keys->dport = tcp->dest;
+ return BPF_OK;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+ if (!udp)
+ return BPF_DROP;
+
+ keys->thoff = keys->nhoff;
+ keys->sport = udp->source;
+ keys->dport = udp->dest;
+ return BPF_OK;
+ default:
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->ip_proto = nexthdr;
+ switch (nexthdr) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ bpf_tail_call(skb, &jmp_table, IPV6OP);
+ break;
+ case IPPROTO_FRAGMENT:
+ bpf_tail_call(skb, &jmp_table, IPV6FR);
+ break;
+ default:
+ return parse_ip_proto(skb, nexthdr);
+ }
+
+ return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph, _iph;
+ bool done = false;
+
+ iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+ if (!iph)
+ return BPF_DROP;
+
+ /* IP header cannot be smaller than 20 bytes */
+ if (iph->ihl < 5)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IP;
+ keys->ipv4_src = iph->saddr;
+ keys->ipv4_dst = iph->daddr;
+
+ keys->nhoff += iph->ihl << 2;
+ if (data + keys->nhoff > data_end)
+ return BPF_DROP;
+
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+ keys->is_frag = true;
+ if (iph->frag_off & bpf_htons(IP_OFFSET))
+ /* From second fragment on, packets do not have headers
+ * we can parse.
+ */
+ done = true;
+ else
+ keys->is_first_frag = true;
+ }
+
+ if (done)
+ return BPF_OK;
+
+ return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct ipv6hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IPV6;
+ memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+ keys->nhoff += sizeof(struct ipv6hdr);
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+ struct ipv6_opt_hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ /* hlen is in 8-octets and does not include the first 8 bytes
+ * of the header
+ */
+ skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct frag_hdr *fragh, _fragh;
+
+ fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+ if (!fragh)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*fragh);
+ keys->is_frag = true;
+ if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+ keys->is_first_frag = true;
+
+ return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+ struct mpls_label *mpls, _mpls;
+
+ mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+ if (!mpls)
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct vlan_hdr *vlan, _vlan;
+ __be16 proto;
+
+ /* Peek back to see if single or double-tagging */
+ if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+ sizeof(proto)))
+ return BPF_DROP;
+
+ /* Account for double-tagging */
+ if (proto == bpf_htons(ETH_P_8021AD)) {
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*vlan);
+ }
+
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*vlan);
+ /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+ vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e4be7730222d..686e57ce40f4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
(void *) BPF_FUNC_map_update_elem;
static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_peek_elem;
static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
static unsigned long long (*bpf_ktime_get_ns)(void) =
@@ -104,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
(void *) BPF_FUNC_msg_cork_bytes;
static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
+ (void *) BPF_FUNC_msg_push_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
@@ -143,6 +152,22 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
(void *) BPF_FUNC_skb_cgroup_id;
static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
+ struct bpf_sock_tuple *tuple,
+ int size, unsigned int netns_id,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
+ struct bpf_sock_tuple *tuple,
+ int size, unsigned int netns_id,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sk_lookup_udp;
+static int (*bpf_sk_release)(struct bpf_sock *sk) =
+ (void *) BPF_FUNC_sk_release;
+static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
+ (void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+ (void *) BPF_FUNC_skb_vlan_pop;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index b4994a94968b..dd49df5e2df4 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -18,3 +18,5 @@ CONFIG_CRYPTO_HMAC=m
CONFIG_CRYPTO_SHA256=m
CONFIG_VXLAN=y
CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_LWTUNNEL=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..d3273b5b3173
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+ struct bpf_program *prog, *main_prog;
+ struct bpf_map *prog_array;
+ int i, fd, prog_fd, ret;
+ struct bpf_object *obj;
+ int prog_array_fd;
+
+ ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+ &prog_fd);
+ if (ret)
+ error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+ main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+ if (!main_prog)
+ error(1, 0, "bpf_object__find_program_by_title %s",
+ cfg_section_name);
+
+ prog_fd = bpf_program__fd(main_prog);
+ if (prog_fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+ if (!prog_array)
+ error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+ prog_array_fd = bpf_map__fd(prog_array);
+ if (prog_array_fd < 0)
+ error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+ if (fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ if (fd != prog_fd) {
+ printf("%d: %s\n", i, bpf_program__title(prog, false));
+ bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+ ++i;
+ }
+ }
+
+ ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+ if (ret)
+ error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+ ret = bpf_object__pin(obj, cfg_pin_path);
+ if (ret)
+ error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+ char command[64];
+ int ret;
+
+ ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (ret)
+ error(1, 0, "bpf_prog_detach");
+
+ /* To unpin, it is necessary and sufficient to just remove this dir */
+ sprintf(command, "rm -r %s", cfg_pin_path);
+ ret = system(command);
+ if (ret)
+ error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ bool attach = false;
+ bool detach = false;
+ int c;
+
+ while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+ switch (c) {
+ case 'a':
+ if (detach)
+ error(1, 0, "attach/detach are exclusive");
+ attach = true;
+ break;
+ case 'd':
+ if (attach)
+ error(1, 0, "attach/detach are exclusive");
+ detach = true;
+ break;
+ case 'p':
+ if (cfg_path_name)
+ error(1, 0, "only one prog name can be given");
+
+ cfg_path_name = optarg;
+ break;
+ case 's':
+ if (cfg_section_name)
+ error(1, 0, "only one section can be given");
+
+ cfg_section_name = optarg;
+ break;
+ }
+ }
+
+ if (detach)
+ cfg_attach = false;
+
+ if (cfg_attach && !cfg_path_name)
+ error(1, 0, "must provide a path to the BPF program");
+
+ if (cfg_attach && !cfg_section_name)
+ error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ if (cfg_attach)
+ load_and_attach_program();
+ else
+ detach_program();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
new file mode 100644
index 000000000000..81084c1c2c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __NETCNT_COMMON_H
+#define __NETCNT_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_PERCPU_PACKETS 32
+
+struct percpu_net_cnt {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+};
+
+struct net_cnt {
+ __u64 packets;
+ __u64 bytes;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c
new file mode 100644
index 000000000000..1198abca1360
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_prog.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/version.h>
+
+#include "bpf_helpers.h"
+#include "netcnt_common.h"
+
+#define MAX_BPS (3 * 1024 * 1024)
+
+#define REFRESH_TIME_NS 100000000
+#define NS_PER_SEC 1000000000
+
+struct bpf_map_def SEC("maps") percpu_netcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ .key_size = sizeof(struct bpf_cgroup_storage_key),
+ .value_size = sizeof(struct percpu_net_cnt),
+};
+
+struct bpf_map_def SEC("maps") netcnt = {
+ .type = BPF_MAP_TYPE_CGROUP_STORAGE,
+ .key_size = sizeof(struct bpf_cgroup_storage_key),
+ .value_size = sizeof(struct net_cnt),
+};
+
+SEC("cgroup/skb")
+int bpf_nextcnt(struct __sk_buff *skb)
+{
+ struct percpu_net_cnt *percpu_cnt;
+ char fmt[] = "%d %llu %llu\n";
+ struct net_cnt *cnt;
+ __u64 ts, dt;
+ int ret;
+
+ cnt = bpf_get_local_storage(&netcnt, 0);
+ percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
+
+ percpu_cnt->packets++;
+ percpu_cnt->bytes += skb->len;
+
+ if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
+ __sync_fetch_and_add(&cnt->packets,
+ percpu_cnt->packets);
+ percpu_cnt->packets = 0;
+
+ __sync_fetch_and_add(&cnt->bytes,
+ percpu_cnt->bytes);
+ percpu_cnt->bytes = 0;
+ }
+
+ ts = bpf_ktime_get_ns();
+ dt = ts - percpu_cnt->prev_ts;
+
+ dt *= MAX_BPS;
+ dt /= NS_PER_SEC;
+
+ if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
+ ret = 1;
+ else
+ ret = 0;
+
+ if (dt > REFRESH_TIME_NS) {
+ percpu_cnt->prev_ts = ts;
+ percpu_cnt->prev_packets = cnt->packets;
+ percpu_cnt->prev_bytes = cnt->bytes;
+ }
+
+ return !!ret;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 6b5cfeb7a9cc..f42b3396d622 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/kernel.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libelf.h>
@@ -45,7 +46,6 @@ static int count_result(int err)
return err;
}
-#define min(a, b) ((a) < (b) ? (a) : (b))
#define __printf(a, b) __attribute__((format(printf, a, b)))
__printf(1, 2)
@@ -130,6 +130,7 @@ struct btf_raw_test {
bool map_create_err;
bool ordered_map;
bool lossless_map;
+ bool percpu_map;
int hdr_len_delta;
int type_off_delta;
int str_off_delta;
@@ -2157,6 +2158,7 @@ static struct btf_pprint_test_meta {
const char *map_name;
bool ordered_map;
bool lossless_map;
+ bool percpu_map;
} pprint_tests_meta[] = {
{
.descr = "BTF pretty print array",
@@ -2164,6 +2166,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_array",
.ordered_map = true,
.lossless_map = true,
+ .percpu_map = false,
},
{
@@ -2172,6 +2175,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_hash",
.ordered_map = false,
.lossless_map = true,
+ .percpu_map = false,
},
{
@@ -2180,30 +2184,83 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_lru_hash",
.ordered_map = false,
.lossless_map = false,
+ .percpu_map = false,
+},
+
+{
+ .descr = "BTF pretty print percpu array",
+ .map_type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .map_name = "pprint_test_percpu_array",
+ .ordered_map = true,
+ .lossless_map = true,
+ .percpu_map = true,
+},
+
+{
+ .descr = "BTF pretty print percpu hash",
+ .map_type = BPF_MAP_TYPE_PERCPU_HASH,
+ .map_name = "pprint_test_percpu_hash",
+ .ordered_map = false,
+ .lossless_map = true,
+ .percpu_map = true,
+},
+
+{
+ .descr = "BTF pretty print lru percpu hash",
+ .map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ .map_name = "pprint_test_lru_percpu_hash",
+ .ordered_map = false,
+ .lossless_map = false,
+ .percpu_map = true,
},
};
-static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i,
+ int num_cpus, int rounded_value_size)
{
- v->ui32 = i;
- v->si32 = -i;
- v->unused_bits2a = 3;
- v->bits28 = i;
- v->unused_bits2b = 3;
- v->ui64 = i;
- v->aenum = i & 0x03;
+ int cpu;
+
+ for (cpu = 0; cpu < num_cpus; cpu++) {
+ v->ui32 = i + cpu;
+ v->si32 = -i;
+ v->unused_bits2a = 3;
+ v->bits28 = i;
+ v->unused_bits2b = 3;
+ v->ui64 = i;
+ v->aenum = i & 0x03;
+ v = (void *)v + rounded_value_size;
+ }
}
+static int check_line(const char *expected_line, int nexpected_line,
+ int expected_line_len, const char *line)
+{
+ if (CHECK(nexpected_line == expected_line_len,
+ "expected_line is too long"))
+ return -1;
+
+ if (strcmp(expected_line, line)) {
+ fprintf(stderr, "unexpected pprint output\n");
+ fprintf(stderr, "expected: %s", expected_line);
+ fprintf(stderr, " read: %s", line);
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int do_test_pprint(void)
{
const struct btf_raw_test *test = &pprint_test_template;
struct bpf_create_map_attr create_attr = {};
+ bool ordered_map, lossless_map, percpu_map;
+ int err, ret, num_cpus, rounded_value_size;
+ struct pprint_mapv *mapv = NULL;
unsigned int key, nr_read_elems;
- bool ordered_map, lossless_map;
int map_fd = -1, btf_fd = -1;
- struct pprint_mapv mapv = {};
unsigned int raw_btf_size;
char expected_line[255];
FILE *pin_file = NULL;
@@ -2212,7 +2269,6 @@ static int do_test_pprint(void)
char *line = NULL;
uint8_t *raw_btf;
ssize_t nread;
- int err, ret;
fprintf(stderr, "%s......", test->descr);
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@@ -2261,9 +2317,18 @@ static int do_test_pprint(void)
if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
goto done;
+ percpu_map = test->percpu_map;
+ num_cpus = percpu_map ? bpf_num_possible_cpus() : 1;
+ rounded_value_size = round_up(sizeof(struct pprint_mapv), 8);
+ mapv = calloc(num_cpus, rounded_value_size);
+ if (CHECK(!mapv, "mapv allocation failure")) {
+ err = -1;
+ goto done;
+ }
+
for (key = 0; key < test->max_entries; key++) {
- set_pprint_mapv(&mapv, key);
- bpf_map_update_elem(map_fd, &key, &mapv, 0);
+ set_pprint_mapv(mapv, key, num_cpus, rounded_value_size);
+ bpf_map_update_elem(map_fd, &key, mapv, 0);
}
pin_file = fopen(pin_path, "r");
@@ -2286,33 +2351,74 @@ static int do_test_pprint(void)
ordered_map = test->ordered_map;
lossless_map = test->lossless_map;
do {
+ struct pprint_mapv *cmapv;
ssize_t nexpected_line;
unsigned int next_key;
+ int cpu;
next_key = ordered_map ? nr_read_elems : atoi(line);
- set_pprint_mapv(&mapv, next_key);
- nexpected_line = snprintf(expected_line, sizeof(expected_line),
- "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
- next_key,
- mapv.ui32, mapv.si32,
- mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
- mapv.ui64,
- mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
- mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
- pprint_enum_str[mapv.aenum]);
-
- if (CHECK(nexpected_line == sizeof(expected_line),
- "expected_line is too long")) {
- err = -1;
- goto done;
+ set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size);
+ cmapv = mapv;
+
+ for (cpu = 0; cpu < num_cpus; cpu++) {
+ if (percpu_map) {
+ /* for percpu map, the format looks like:
+ * <key>: {
+ * cpu0: <value_on_cpu0>
+ * cpu1: <value_on_cpu1>
+ * ...
+ * cpun: <value_on_cpun>
+ * }
+ *
+ * let us verify the line containing the key here.
+ */
+ if (cpu == 0) {
+ nexpected_line = snprintf(expected_line,
+ sizeof(expected_line),
+ "%u: {\n",
+ next_key);
+
+ err = check_line(expected_line, nexpected_line,
+ sizeof(expected_line), line);
+ if (err == -1)
+ goto done;
+ }
+
+ /* read value@cpu */
+ nread = getline(&line, &line_len, pin_file);
+ if (nread < 0)
+ break;
+ }
+
+ nexpected_line = snprintf(expected_line, sizeof(expected_line),
+ "%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
+ "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+ percpu_map ? "\tcpu" : "",
+ percpu_map ? cpu : next_key,
+ cmapv->ui32, cmapv->si32,
+ cmapv->unused_bits2a,
+ cmapv->bits28,
+ cmapv->unused_bits2b,
+ cmapv->ui64,
+ cmapv->ui8a[0], cmapv->ui8a[1],
+ cmapv->ui8a[2], cmapv->ui8a[3],
+ cmapv->ui8a[4], cmapv->ui8a[5],
+ cmapv->ui8a[6], cmapv->ui8a[7],
+ pprint_enum_str[cmapv->aenum]);
+
+ err = check_line(expected_line, nexpected_line,
+ sizeof(expected_line), line);
+ if (err == -1)
+ goto done;
+
+ cmapv = (void *)cmapv + rounded_value_size;
}
- if (strcmp(expected_line, line)) {
- err = -1;
- fprintf(stderr, "unexpected pprint output\n");
- fprintf(stderr, "expected: %s", expected_line);
- fprintf(stderr, " read: %s", line);
- goto done;
+ if (percpu_map) {
+ /* skip the last bracket for the percpu map */
+ nread = getline(&line, &line_len, pin_file);
+ if (nread < 0)
+ break;
}
nread = getline(&line, &line_len, pin_file);
@@ -2334,6 +2440,8 @@ static int do_test_pprint(void)
err = 0;
done:
+ if (mapv)
+ free(mapv);
if (!err)
fprintf(stderr, "OK");
if (*btf_log_buf && (err || args.always_log))
@@ -2361,6 +2469,7 @@ static int test_pprint(void)
pprint_test_template.map_name = pprint_tests_meta[i].map_name;
pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+ pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map;
err |= count_result(do_test_pprint());
}
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 4e196e3bfecf..f44834155f25 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -4,6 +4,7 @@
#include <linux/filter.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/sysinfo.h>
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
@@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE];
int main(int argc, char **argv)
{
struct bpf_insn prog[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
@@ -28,9 +37,18 @@ int main(int argc, char **argv)
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
int error = EXIT_FAILURE;
- int map_fd, prog_fd, cgroup_fd;
+ int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
struct bpf_cgroup_storage_key key;
unsigned long long value;
+ unsigned long long *percpu_value;
+ int cpu, nproc;
+
+ nproc = get_nprocs_conf();
+ percpu_value = malloc(sizeof(*percpu_value) * nproc);
+ if (!percpu_value) {
+ printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+ goto err;
+ }
map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
sizeof(value), 0, 0);
@@ -39,7 +57,15 @@ int main(int argc, char **argv)
goto out;
}
- prog[0].imm = map_fd;
+ percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ sizeof(key), sizeof(value), 0, 0);
+ if (percpu_map_fd < 0) {
+ printf("Failed to create map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ prog[0].imm = percpu_map_fd;
+ prog[7].imm = map_fd;
prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
@@ -77,7 +103,15 @@ int main(int argc, char **argv)
}
if (bpf_map_lookup_elem(map_fd, &key, &value)) {
- printf("Failed to lookup cgroup storage\n");
+ printf("Failed to lookup cgroup storage 0\n");
+ goto err;
+ }
+
+ for (cpu = 0; cpu < nproc; cpu++)
+ percpu_value[cpu] = 1000;
+
+ if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) {
+ printf("Failed to update the data in the cgroup storage\n");
goto err;
}
@@ -120,11 +154,31 @@ int main(int argc, char **argv)
goto err;
}
+ /* Check the final value of the counter in the percpu local storage */
+
+ for (cpu = 0; cpu < nproc; cpu++)
+ percpu_value[cpu] = 0;
+
+ if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) {
+ printf("Failed to lookup the per-cpu cgroup storage\n");
+ goto err;
+ }
+
+ value = 0;
+ for (cpu = 0; cpu < nproc; cpu++)
+ value += percpu_value[cpu];
+
+ if (value != nproc * 1000 + 6) {
+ printf("Unexpected data in the per-cpu cgroup storage\n");
+ goto err;
+ }
+
error = 0;
printf("test_cgroup_storage:PASS\n");
err:
cleanup_cgroup_environment();
+ free(percpu_value);
out:
return error;
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6 outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6 inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER 8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+ uint16_t unused;
+ uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hlen:5,
+ control:1,
+ version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:2,
+ control:1,
+ hlen:5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 proto_ctype;
+ __be16 flags;
+ };
+ __be32 word;
+ };
+};
+
+static uint8_t cfg_dsfield_inner;
+static uint8_t cfg_dsfield_outer;
+static uint8_t cfg_encap_proto;
+static bool cfg_expect_failure = false;
+static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */
+static int cfg_l3_inner = AF_UNSPEC;
+static int cfg_l3_outer = AF_UNSPEC;
+static int cfg_num_pkt = 10;
+static int cfg_num_secs = 0;
+static char cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static int cfg_port_gue = 6080;
+static bool cfg_only_rx;
+static bool cfg_only_tx;
+static int cfg_src_port = 9;
+
+static char buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port) \
+ static struct sockaddr_in name = { \
+ .sin_family = AF_INET, \
+ .sin_port = __constant_htons(port), \
+ .sin_addr.s_addr = __constant_htonl(addr4), \
+ };
+
+#define INIT_ADDR6(name, addr6, port) \
+ static struct sockaddr_in6 name = { \
+ .sin6_family = AF_INET6, \
+ .sin6_port = __constant_htons(port), \
+ .sin6_addr = addr6, \
+ };
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+ unsigned long off = 0;
+ char nbuf[INET6_ADDRSTRLEN];
+
+ switch (addr->sa_family) {
+ case PF_INET:
+ off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+ break;
+ case PF_INET6:
+ off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+ break;
+ default:
+ error(1, 0, "printaddr: unsupported family %u\n",
+ addr->sa_family);
+ }
+
+ if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+ sizeof(nbuf)))
+ error(1, errno, "inet_ntop");
+
+ fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+ uint32_t src, uint32_t dst,
+ int payload_len, uint8_t tos)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = tos;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = proto;
+ iph->saddr = src;
+ iph->daddr = dst;
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t) dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+ struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ int payload_len, uint8_t dsfield)
+{
+ struct ipv6hdr *ip6h = header;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 8;
+ ipv6_set_dsfield(ip6h, dsfield);
+
+ memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+ pseudo_sum += htons(IPPROTO_UDP);
+ pseudo_sum += udph->len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+ pseudo_sum += htons(ip6h->nexthdr);
+ pseudo_sum += ip6h->payload_len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+ uint16_t dport, int family)
+{
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(cfg_src_port);
+ udph->dest = htons(dport);
+ udph->len = htons(len);
+ udph->check = 0;
+ if (family == AF_INET)
+ udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+ udph, len >> 1);
+ else
+ udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+ udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+ struct guehdr *gueh = header;
+
+ gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+ struct grehdr *greh = header;
+
+ greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+ if (family == AF_INET)
+ return sizeof(struct iphdr);
+ else
+ return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+ int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+ int el3_len = 0;
+
+ if (cfg_l3_extra)
+ el3_len = l3_length(cfg_l3_extra);
+
+ /* calculate header offsets */
+ if (cfg_encap_proto) {
+ ol3_len = l3_length(cfg_l3_outer);
+
+ if (cfg_encap_proto == IPPROTO_GRE)
+ ol4_len = sizeof(struct grehdr);
+ else if (cfg_encap_proto == IPPROTO_UDP)
+ ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ }
+
+ il3_len = l3_length(cfg_l3_inner);
+ il4_len = sizeof(struct udphdr);
+
+ if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+ sizeof(buf))
+ error(1, 0, "packet too large\n");
+
+ /*
+ * Fill packet from inside out, to calculate correct checksums.
+ * But create ip before udp headers, as udp uses ip for pseudo-sum.
+ */
+ memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+ cfg_payload_char, cfg_payload_len);
+
+ /* add zero byte for udp csum padding */
+ buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+ switch (cfg_l3_inner) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ in_saddr4.sin_addr.s_addr,
+ in_daddr4.sin_addr.s_addr,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ &in_saddr6, &in_daddr6,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ }
+
+ build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+ cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+ if (!cfg_encap_proto)
+ return il3_len + il4_len + cfg_payload_len;
+
+ switch (cfg_l3_outer) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len, cfg_encap_proto,
+ out_saddr4.sin_addr.s_addr,
+ out_daddr4.sin_addr.s_addr,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len, cfg_encap_proto,
+ &out_saddr6, &out_daddr6,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ }
+
+ switch (cfg_encap_proto) {
+ case IPPROTO_UDP:
+ build_gue_header(buf + el3_len + ol3_len + ol4_len -
+ sizeof(struct guehdr),
+ cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6);
+ build_udp_header(buf + el3_len + ol3_len,
+ sizeof(struct guehdr) + il3_len + il4_len +
+ cfg_payload_len,
+ cfg_port_gue, cfg_l3_outer);
+ break;
+ case IPPROTO_GRE:
+ build_gre_header(buf + el3_len + ol3_len,
+ cfg_l3_inner == PF_INET ? ETH_P_IP
+ : ETH_P_IPV6);
+ break;
+ }
+
+ switch (cfg_l3_extra) {
+ case PF_INET:
+ build_ipv4_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ extra_saddr4.sin_addr.s_addr,
+ extra_daddr4.sin_addr.s_addr,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ &extra_saddr6, &extra_daddr6,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ }
+
+ return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+ int family, fd, ret;
+
+ if (cfg_l3_extra)
+ family = cfg_l3_extra;
+ else if (cfg_l3_outer)
+ family = cfg_l3_outer;
+ else
+ family = cfg_l3_inner;
+
+ fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+ if (fd == -1)
+ error(1, errno, "socket tx");
+
+ if (cfg_l3_extra) {
+ if (cfg_l3_extra == PF_INET)
+ ret = connect(fd, (void *) &extra_daddr4,
+ sizeof(extra_daddr4));
+ else
+ ret = connect(fd, (void *) &extra_daddr6,
+ sizeof(extra_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else if (cfg_l3_outer) {
+ /* connect to destination if not encapsulated */
+ if (cfg_l3_outer == PF_INET)
+ ret = connect(fd, (void *) &out_daddr4,
+ sizeof(out_daddr4));
+ else
+ ret = connect(fd, (void *) &out_daddr6,
+ sizeof(out_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else {
+ /* otherwise using loopback */
+ if (cfg_l3_inner == PF_INET)
+ ret = connect(fd, (void *) &in_daddr4,
+ sizeof(in_daddr4));
+ else
+ ret = connect(fd, (void *) &in_daddr6,
+ sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ }
+
+ return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+ int fd, ret;
+
+ fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket rx");
+
+ if (cfg_l3_inner == PF_INET)
+ ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+ else
+ ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "bind rx");
+
+ return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+ int ret;
+
+ ret = write(fd, pkt, len);
+ if (ret == -1)
+ error(1, errno, "send");
+ if (ret != len)
+ error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+ return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = events;
+
+ ret = poll(&pfd, 1, timeout);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret && !(pfd.revents & POLLIN))
+ error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+ return ret;
+}
+
+static int do_rx(int fd)
+{
+ char rbuf;
+ int ret, num = 0;
+
+ while (1) {
+ ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (rbuf != cfg_payload_char)
+ error(1, 0, "recv: payload mismatch");
+ num++;
+ };
+
+ return num;
+}
+
+static int do_main(void)
+{
+ unsigned long tstop, treport, tcur;
+ int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+ if (!cfg_only_tx)
+ fdr = setup_rx();
+ if (!cfg_only_rx)
+ fdt = setup_tx();
+
+ len = build_packet();
+
+ tcur = util_gettime();
+ treport = tcur + 1000;
+ tstop = tcur + (cfg_num_secs * 1000);
+
+ while (1) {
+ if (!cfg_only_rx)
+ tx += do_tx(fdt, buf, len);
+
+ if (!cfg_only_tx)
+ rx += do_rx(fdr);
+
+ if (cfg_num_secs) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+ if (tcur >= treport) {
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+ tx = 0;
+ rx = 0;
+ treport = tcur + 1000;
+ }
+ } else {
+ if (tx == cfg_num_pkt)
+ break;
+ }
+ }
+
+ /* read straggler packets, if any */
+ if (rx < tx) {
+ tstop = util_gettime() + 100;
+ while (rx < tx) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+
+ do_poll(fdr, POLLIN, tstop - tcur);
+ rx += do_rx(fdr);
+ }
+ }
+
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+ if (fdr != -1 && close(fdr))
+ error(1, errno, "close rx");
+ if (fdt != -1 && close(fdt))
+ error(1, errno, "close tx");
+
+ /*
+ * success (== 0) only if received all packets
+ * unless failure is expected, in which case none must arrive.
+ */
+ if (cfg_expect_failure)
+ return rx != 0;
+ else
+ return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+ "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+ "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+ "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+ int ret;
+
+ ret = inet_pton(family, optarg, addr);
+ if (ret == -1)
+ error(1, errno, "inet_pton");
+ if (ret == 0)
+ error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+ parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+ parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+ if (!strcmp(optarg, "4"))
+ return PF_INET;
+ if (!strcmp(optarg, "6"))
+ return PF_INET6;
+
+ usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+ switch (c) {
+ case 'd':
+ if (cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "-d must be preceded by -o");
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_daddr4, optarg);
+ else
+ parse_addr6(&out_daddr6, optarg);
+ break;
+ case 'D':
+ if (cfg_l3_inner == AF_UNSPEC)
+ error(1, 0, "-D must be preceded by -i");
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_daddr4, optarg);
+ else
+ parse_addr6(&in_daddr6, optarg);
+ break;
+ case 'e':
+ if (!strcmp(optarg, "gre"))
+ cfg_encap_proto = IPPROTO_GRE;
+ else if (!strcmp(optarg, "gue"))
+ cfg_encap_proto = IPPROTO_UDP;
+ else if (!strcmp(optarg, "bare"))
+ cfg_encap_proto = IPPROTO_IPIP;
+ else if (!strcmp(optarg, "none"))
+ cfg_encap_proto = IPPROTO_IP; /* == 0 */
+ else
+ usage(argv[0]);
+ break;
+ case 'f':
+ cfg_src_port = strtol(optarg, NULL, 0);
+ break;
+ case 'F':
+ cfg_expect_failure = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ if (!strcmp(optarg, "4"))
+ cfg_l3_inner = PF_INET;
+ else if (!strcmp(optarg, "6"))
+ cfg_l3_inner = PF_INET6;
+ else
+ usage(argv[0]);
+ break;
+ case 'l':
+ cfg_payload_len = strtol(optarg, NULL, 0);
+ break;
+ case 'n':
+ cfg_num_pkt = strtol(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'O':
+ cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'R':
+ cfg_only_rx = true;
+ break;
+ case 's':
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_saddr4, optarg);
+ else
+ parse_addr6(&out_saddr6, optarg);
+ break;
+ case 'S':
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_saddr4, optarg);
+ else
+ parse_addr6(&in_saddr6, optarg);
+ break;
+ case 't':
+ cfg_num_secs = strtol(optarg, NULL, 0);
+ break;
+ case 'T':
+ cfg_only_tx = true;
+ break;
+ case 'x':
+ cfg_dsfield_outer = strtol(optarg, NULL, 0);
+ break;
+ case 'X':
+ cfg_dsfield_inner = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_only_rx && cfg_only_tx)
+ error(1, 0, "options: cannot combine rx-only and tx-only");
+
+ if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "options: must specify outer with encap");
+ else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and outer");
+ else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and extra");
+
+ if (cfg_l3_inner == AF_UNSPEC)
+ cfg_l3_inner = AF_INET6;
+ if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+ cfg_encap_proto = IPPROTO_IPV6;
+
+ /* RFC 6040 4.2:
+ * on decap, if outer encountered congestion (CE == 0x3),
+ * but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+ */
+ if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+ ((cfg_dsfield_inner & 0x3) == 0x0))
+ cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+ if (cfg_l3_inner == PF_INET6) {
+ util_printaddr("inner.dest6", (void *) &in_daddr6);
+ util_printaddr("inner.source6", (void *) &in_saddr6);
+ } else {
+ util_printaddr("inner.dest4", (void *) &in_daddr4);
+ util_printaddr("inner.source4", (void *) &in_saddr4);
+ }
+
+ if (!cfg_l3_outer)
+ return;
+
+ fprintf(stderr, "encap proto: %u\n", cfg_encap_proto);
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("outer.dest6", (void *) &out_daddr6);
+ util_printaddr("outer.source6", (void *) &out_saddr6);
+ } else {
+ util_printaddr("outer.dest4", (void *) &out_daddr4);
+ util_printaddr("outer.source4", (void *) &out_saddr4);
+ }
+
+ if (!cfg_l3_extra)
+ return;
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("extra.dest6", (void *) &extra_daddr6);
+ util_printaddr("extra.source6", (void *) &extra_saddr6);
+ } else {
+ util_printaddr("extra.dest4", (void *) &extra_daddr4);
+ util_printaddr("extra.source4", (void *) &extra_saddr4);
+ }
+
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ print_opts();
+ return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+ ../net/in_netns.sh "$0" "$@"
+ exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+ if (( $? == 0 )); then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+
+ set +e
+
+ # Cleanup
+ tc filter del dev lo ingress pref 1337 2> /dev/null
+ tc qdisc del dev lo ingress 2> /dev/null
+ ./flow_dissector_load -d 2> /dev/null
+ if [ $unmount -ne 0 ]; then
+ umount bpffs 2> /dev/null
+ fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+ echo "bpffs already mounted"
+else
+ echo "bpffs not mounted. Mounting..."
+ unmount=1
+ /bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
index d97dc914cd49..156d89f1edcc 100755
--- a/tools/testing/selftests/bpf/test_libbpf.sh
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -6,7 +6,7 @@ export TESTNAME=test_libbpf
# Determine selftest success via shell exit code
exit_handler()
{
- if (( $? == 0 )); then
+ if [ $? -eq 0 ]; then
echo "selftests: $TESTNAME [PASS]";
else
echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
#include <string.h>
#include <assert.h>
#include <stdlib.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/socket.h>
@@ -471,6 +472,122 @@ static void test_devmap(int task, void *data)
close(fd);
}
+static void test_queuemap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Queue map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create queuemap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_stackmap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Stack map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create stackmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
@@ -1434,10 +1551,15 @@ static void run_all_tests(void)
test_map_wronly();
test_reuseport_array();
+
+ test_queuemap(0, NULL);
+ test_stackmap(0, NULL);
}
int main(void)
{
+ srand(time(NULL));
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
new file mode 100644
index 000000000000..7887df693399
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "netcnt_common.h"
+
+#define BPF_PROG "./netcnt_prog.o"
+#define TEST_CGROUP "/test-network-counters/"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+ struct percpu_net_cnt *percpu_netcnt;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ int error = EXIT_FAILURE;
+ struct net_cnt netcnt;
+ struct bpf_object *obj;
+ int prog_fd, cgroup_fd;
+ unsigned long packets;
+ unsigned long bytes;
+ int cpu, nproc;
+ __u32 prog_cnt;
+
+ nproc = get_nprocs_conf();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!percpu_netcnt) {
+ printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+ goto err;
+ }
+
+ if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
+ &obj, &prog_fd)) {
+ printf("Failed to load bpf program\n");
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to load bpf program\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+ printf("Failed to attach bpf program");
+ goto err;
+ }
+
+ assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0);
+
+ if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
+ &prog_cnt)) {
+ printf("Failed to query attached programs");
+ goto err;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "netcnt");
+ if (map_fd < 0) {
+ printf("Failed to find bpf map with net counters");
+ goto err;
+ }
+
+ percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
+ if (percpu_map_fd < 0) {
+ printf("Failed to find bpf map with percpu net counters");
+ goto err;
+ }
+
+ if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+ printf("Failed to get key in cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
+ printf("Failed to lookup percpu cgroup storage\n");
+ goto err;
+ }
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
+ printf("Unexpected percpu value: %llu\n",
+ percpu_netcnt[cpu].packets);
+ goto err;
+ }
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ if (packets != 10000) {
+ printf("Unexpected packet count: %lu\n", packets);
+ goto err;
+ }
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ if (bytes != packets * 104) {
+ printf("Unexpected bytes count: %lu\n", bytes);
+ goto err;
+ }
+
+ error = 0;
+ printf("test_netcnt:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+ free(percpu_netcnt);
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0ef68204c84b..2d3c04f45530 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -112,13 +112,13 @@ static void test_pkt_access(void)
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
- CHECK(err || errno || retval, "ipv4",
+ CHECK(err || retval, "ipv4",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
- CHECK(err || errno || retval, "ipv6",
+ CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
bpf_object__close(obj);
@@ -153,14 +153,14 @@ static void test_xdp(void)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 74 ||
+ CHECK(err || retval != XDP_TX || size != 74 ||
iph->protocol != IPPROTO_IPIP, "ipv4",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 114 ||
+ CHECK(err || retval != XDP_TX || size != 114 ||
iph6->nexthdr != IPPROTO_IPV6, "ipv6",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
@@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_DROP,
+ CHECK(err || retval != XDP_DROP,
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 54,
+ CHECK(err || retval != XDP_TX || size != 54,
"ipv6", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
bpf_object__close(obj);
@@ -254,14 +254,14 @@ static void test_l4lb(const char *file)
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+ CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+ CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
@@ -343,14 +343,14 @@ static void test_xdp_noinline(void)
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 1 || size != 54 ||
+ CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 1 || size != 74 ||
+ CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
@@ -1698,8 +1698,142 @@ static void test_task_fd_query_tp(void)
"sys_enter_read");
}
+static void test_reference_tracking()
+{
+ const char *file = "./test_sk_lookup_kern.o";
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ __u32 duration;
+ int err = 0;
+
+ obj = bpf_object__open(file);
+ if (IS_ERR(obj)) {
+ error_cnt++;
+ return;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *title;
+
+ /* Ignore .text sections */
+ title = bpf_program__title(prog, false);
+ if (strstr(title, ".text") != NULL)
+ continue;
+
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+
+ /* Expect verifier failure if test name has 'fail' */
+ if (strstr(title, "fail") != NULL) {
+ libbpf_set_print(NULL, NULL, NULL);
+ err = !bpf_program__load(prog, "GPL", 0);
+ libbpf_set_print(printf, printf, NULL);
+ } else {
+ err = bpf_program__load(prog, "GPL", 0);
+ }
+ CHECK(err, title, "\n");
+ }
+ bpf_object__close(obj);
+}
+
+enum {
+ QUEUE,
+ STACK,
+};
+
+static void test_queue_stack_map(int type)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE], duration, retval, size, val;
+ int i, err, prog_fd, map_in_fd, map_out_fd;
+ char file[32], buf[128];
+ struct bpf_object *obj;
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE; i++)
+ vals[i] = rand();
+
+ if (type == QUEUE)
+ strncpy(file, "./test_queue_map.o", sizeof(file));
+ else if (type == STACK)
+ strncpy(file, "./test_stack_map.o", sizeof(file));
+ else
+ return;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_in_fd = bpf_find_map(__func__, obj, "map_in");
+ if (map_in_fd < 0)
+ goto out;
+
+ map_out_fd = bpf_find_map(__func__, obj, "map_out");
+ if (map_out_fd < 0)
+ goto out;
+
+ /* Push 32 elements to the input map */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
+ if (err) {
+ error_cnt++;
+ goto out;
+ }
+ }
+
+ /* The eBPF program pushes iph.saddr in the output map,
+ * pops the input map and saves this value in iph.daddr
+ */
+ for (i = 0; i < MAP_SIZE; i++) {
+ if (type == QUEUE) {
+ val = vals[i];
+ pkt_v4.iph.saddr = vals[i] * 5;
+ } else if (type == STACK) {
+ val = vals[MAP_SIZE - 1 - i];
+ pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ if (err || retval || size != sizeof(pkt_v4) ||
+ iph->daddr != val)
+ break;
+ }
+
+ CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+ "bpf_map_pop_elem",
+ "err %d errno %d retval %d size %d iph->daddr %u\n",
+ err, errno, retval, size, iph->daddr);
+
+ /* Queue is empty, program should return TC_ACT_SHOT */
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
+ "check-queue-stack-map-empty",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ /* Check that the program pushed elements correctly */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
+ if (err || val != vals[i] * 5)
+ break;
+ }
+
+ CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
+ "bpf_map_push_elem", "err %d value %u\n", err, val);
+
+out:
+ pkt_v4.iph.saddr = 0;
+ bpf_object__close(obj);
+}
+
int main(void)
{
+ srand(time(NULL));
+
jit_enabled = is_jit_enabled();
test_pkt_access();
@@ -1719,6 +1853,9 @@ int main(void)
test_get_stack_raw_tp();
test_task_fd_query_rawtp();
test_task_fd_query_tp();
+ test_reference_tracking();
+ test_queue_stack_map(QUEUE);
+ test_queue_stack_map(STACK);
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c
new file mode 100644
index 000000000000..87db1f9da33d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
new file mode 100644
index 000000000000..295b9b3bc5c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ __u32 value;
+ int err;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ err = bpf_map_pop_elem(&map_in, &value);
+ if (err)
+ return TC_ACT_SHOT;
+
+ iph->daddr = value;
+
+ err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+ if (err)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
new file mode 100644
index 000000000000..7c4f41572b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <err.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+
+struct sec_name_test {
+ const char sec_name[32];
+ struct {
+ int rc;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ } expected_load;
+ struct {
+ int rc;
+ enum bpf_attach_type attach_type;
+ } expected_attach;
+};
+
+static struct sec_name_test tests[] = {
+ {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+ {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+ {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
+ {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+ {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+ {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
+ {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
+ {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+ {
+ "raw_tracepoint/",
+ {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
+ {-EINVAL, 0},
+ },
+ {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+ {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
+ {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
+ {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
+ {"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} },
+ {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
+ {
+ "cgroup_skb/ingress",
+ {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+ {0, BPF_CGROUP_INET_INGRESS},
+ },
+ {
+ "cgroup_skb/egress",
+ {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+ {0, BPF_CGROUP_INET_EGRESS},
+ },
+ {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
+ {
+ "cgroup/sock",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+ {0, BPF_CGROUP_INET_SOCK_CREATE},
+ },
+ {
+ "cgroup/post_bind4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
+ {0, BPF_CGROUP_INET4_POST_BIND},
+ },
+ {
+ "cgroup/post_bind6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND},
+ {0, BPF_CGROUP_INET6_POST_BIND},
+ },
+ {
+ "cgroup/dev",
+ {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+ {0, BPF_CGROUP_DEVICE},
+ },
+ {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+ {
+ "sk_skb/stream_parser",
+ {0, BPF_PROG_TYPE_SK_SKB, 0},
+ {0, BPF_SK_SKB_STREAM_PARSER},
+ },
+ {
+ "sk_skb/stream_verdict",
+ {0, BPF_PROG_TYPE_SK_SKB, 0},
+ {0, BPF_SK_SKB_STREAM_VERDICT},
+ },
+ {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
+ {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
+ {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+ {
+ "flow_dissector",
+ {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+ {0, BPF_FLOW_DISSECTOR},
+ },
+ {
+ "cgroup/bind4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND},
+ {0, BPF_CGROUP_INET4_BIND},
+ },
+ {
+ "cgroup/bind6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND},
+ {0, BPF_CGROUP_INET6_BIND},
+ },
+ {
+ "cgroup/connect4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT},
+ {0, BPF_CGROUP_INET4_CONNECT},
+ },
+ {
+ "cgroup/connect6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT},
+ {0, BPF_CGROUP_INET6_CONNECT},
+ },
+ {
+ "cgroup/sendmsg4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
+ {0, BPF_CGROUP_UDP4_SENDMSG},
+ },
+ {
+ "cgroup/sendmsg6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
+ {0, BPF_CGROUP_UDP6_SENDMSG},
+ },
+};
+
+static int test_prog_type_by_name(const struct sec_name_test *test)
+{
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_prog_type prog_type;
+ int rc;
+
+ rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
+ &expected_attach_type);
+
+ if (rc != test->expected_load.rc) {
+ warnx("prog: unexpected rc=%d for %s", rc, test->sec_name);
+ return -1;
+ }
+
+ if (rc)
+ return 0;
+
+ if (prog_type != test->expected_load.prog_type) {
+ warnx("prog: unexpected prog_type=%d for %s", prog_type,
+ test->sec_name);
+ return -1;
+ }
+
+ if (expected_attach_type != test->expected_load.expected_attach_type) {
+ warnx("prog: unexpected expected_attach_type=%d for %s",
+ expected_attach_type, test->sec_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int test_attach_type_by_name(const struct sec_name_test *test)
+{
+ enum bpf_attach_type attach_type;
+ int rc;
+
+ rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
+
+ if (rc != test->expected_attach.rc) {
+ warnx("attach: unexpected rc=%d for %s", rc, test->sec_name);
+ return -1;
+ }
+
+ if (rc)
+ return 0;
+
+ if (attach_type != test->expected_attach.attach_type) {
+ warnx("attach: unexpected attach_type=%d for %s", attach_type,
+ test->sec_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test_case(const struct sec_name_test *test)
+{
+ if (test_prog_type_by_name(test))
+ return -1;
+ if (test_attach_type_by_name(test))
+ return -1;
+ return 0;
+}
+
+static int run_tests(void)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(&tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ return run_tests();
+}
diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
new file mode 100644
index 000000000000..b745bdc08c2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
+ void *data_end, __u16 eth_proto,
+ bool *ipv4)
+{
+ struct bpf_sock_tuple *result;
+ __u8 proto = 0;
+ __u64 ihl_len;
+
+ if (eth_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + nh_off);
+
+ if (iph + 1 > data_end)
+ return NULL;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&iph->saddr;
+ } else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
+
+ if (ip6h + 1 > data_end)
+ return NULL;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&ip6h->saddr;
+ }
+
+ if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
+ return NULL;
+
+ return result;
+}
+
+SEC("sk_lookup_success")
+int bpf_sk_lookup_test0(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *sk;
+ size_t tuple_len;
+ bool ipv4;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
+ if (!tuple || tuple + sizeof *tuple > data_end)
+ return TC_ACT_SHOT;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
+}
+
+SEC("sk_lookup_success_simple")
+int bpf_sk_lookup_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_use_after_free")
+int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family = 0;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk) {
+ bpf_sk_release(sk);
+ family = sk->family;
+ }
+ return family;
+}
+
+SEC("fail_modify_sk_pointer")
+int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ if (sk) {
+ sk += 1;
+ bpf_sk_release(sk);
+ }
+ return 0;
+}
+
+SEC("fail_modify_sk_or_null_pointer")
+int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ sk += 1;
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_no_release")
+int bpf_sk_lookup_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ return 0;
+}
+
+SEC("fail_release_twice")
+int bpf_sk_lookup_test3(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ bpf_sk_release(sk);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_release_unchecked")
+int bpf_sk_lookup_test4(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+void lookup_no_release(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+}
+
+SEC("fail_no_release_subcall")
+int bpf_sk_lookup_test5(struct __sk_buff *skb)
+{
+ lookup_no_release(skb);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 68e108e4687a..b6c2c605d8c0 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -158,11 +158,7 @@ static int run_test(int cgfd)
bpf_object__for_each_program(prog, pobj) {
prog_name = bpf_program__title(prog, /*needs_copy*/ false);
- if (strcmp(prog_name, "cgroup/connect6") == 0) {
- attach_type = BPF_CGROUP_INET6_CONNECT;
- } else if (strcmp(prog_name, "sockops") == 0) {
- attach_type = BPF_CGROUP_SOCK_OPS;
- } else {
+ if (libbpf_attach_type_by_name(prog_name, &attach_type)) {
log_err("Unexpected prog: %s", prog_name);
goto err;
}
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 0c7d9e556b47..622ade0a0957 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -28,6 +28,7 @@
#include <linux/sock_diag.h>
#include <linux/bpf.h>
#include <linux/if_link.h>
+#include <linux/tls.h>
#include <assert.h>
#include <libgen.h>
@@ -43,6 +44,13 @@
int running;
static void running_handler(int a);
+#ifndef TCP_ULP
+# define TCP_ULP 31
+#endif
+#ifndef SOL_TLS
+# define SOL_TLS 282
+#endif
+
/* randomly selected ports for testing on lo */
#define S1_PORT 10000
#define S2_PORT 10001
@@ -69,8 +77,12 @@ int txmsg_apply;
int txmsg_cork;
int txmsg_start;
int txmsg_end;
+int txmsg_start_push;
+int txmsg_end_push;
int txmsg_ingress;
int txmsg_skb;
+int ktls;
+int peek_flag;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -90,8 +102,12 @@ static const struct option long_options[] = {
{"txmsg_cork", required_argument, NULL, 'k'},
{"txmsg_start", required_argument, NULL, 's'},
{"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_start_push", required_argument, NULL, 'p'},
+ {"txmsg_end_push", required_argument, NULL, 'q'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
{"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {"ktls", no_argument, &ktls, 1 },
+ {"peek", no_argument, &peek_flag, 1 },
{0, 0, NULL, 0 }
};
@@ -112,6 +128,71 @@ static void usage(char *argv[])
printf("\n");
}
+char *sock_to_string(int s)
+{
+ if (s == c1)
+ return "client1";
+ else if (s == c2)
+ return "client2";
+ else if (s == s1)
+ return "server1";
+ else if (s == s2)
+ return "server2";
+ else if (s == p1)
+ return "peer1";
+ else if (s == p2)
+ return "peer2";
+ else
+ return "unknown";
+}
+
+static int sockmap_init_ktls(int verbose, int s)
+{
+ struct tls12_crypto_info_aes_gcm_128 tls_tx = {
+ .info = {
+ .version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
+ },
+ };
+ struct tls12_crypto_info_aes_gcm_128 tls_rx = {
+ .info = {
+ .version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
+ },
+ };
+ int so_buf = 6553500;
+ int err;
+
+ err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
+ if (err) {
+ fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
+ return -EINVAL;
+ }
+ err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
+ if (err) {
+ fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
+ return -EINVAL;
+ }
+ err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
+ if (err) {
+ fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
+ return -EINVAL;
+ }
+ err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
+ if (err) {
+ fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
+ return -EINVAL;
+ }
+ err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
+ if (err) {
+ fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
+ return -EINVAL;
+ }
+
+ if (verbose)
+ fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
+ return 0;
+}
static int sockmap_init_sockets(int verbose)
{
int i, err, one = 1;
@@ -277,33 +358,40 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
return 0;
}
-static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
- struct msg_stats *s, bool tx,
- struct sockmap_options *opt)
+static void msg_free_iov(struct msghdr *msg)
{
- struct msghdr msg = {0};
- int err, i, flags = MSG_NOSIGNAL;
+ int i;
+
+ for (i = 0; i < msg->msg_iovlen; i++)
+ free(msg->msg_iov[i].iov_base);
+ free(msg->msg_iov);
+ msg->msg_iov = NULL;
+ msg->msg_iovlen = 0;
+}
+
+static int msg_alloc_iov(struct msghdr *msg,
+ int iov_count, int iov_length,
+ bool data, bool xmit)
+{
+ unsigned char k = 0;
struct iovec *iov;
- unsigned char k;
- bool data_test = opt->data_test;
- bool drop = opt->drop_expected;
+ int i;
iov = calloc(iov_count, sizeof(struct iovec));
if (!iov)
return errno;
- k = 0;
for (i = 0; i < iov_count; i++) {
unsigned char *d = calloc(iov_length, sizeof(char));
if (!d) {
fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
- goto out_errno;
+ goto unwind_iov;
}
iov[i].iov_base = d;
iov[i].iov_len = iov_length;
- if (data_test && tx) {
+ if (data && xmit) {
int j;
for (j = 0; j < iov_length; j++)
@@ -311,9 +399,60 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
}
- msg.msg_iov = iov;
- msg.msg_iovlen = iov_count;
- k = 0;
+ msg->msg_iov = iov;
+ msg->msg_iovlen = iov_count;
+
+ return 0;
+unwind_iov:
+ for (i--; i >= 0 ; i--)
+ free(msg->msg_iov[i].iov_base);
+ return -ENOMEM;
+}
+
+static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
+{
+ int i, j, bytes_cnt = 0;
+ unsigned char k = 0;
+
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ unsigned char *d = msg->msg_iov[i].iov_base;
+
+ for (j = 0;
+ j < msg->msg_iov[i].iov_len && size; j++) {
+ if (d[j] != k++) {
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k);
+ return -EIO;
+ }
+ bytes_cnt++;
+ if (bytes_cnt == chunk_sz) {
+ k = 0;
+ bytes_cnt = 0;
+ }
+ size--;
+ }
+ }
+ return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
+{
+ struct msghdr msg = {0}, msg_peek = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ bool drop = opt->drop_expected;
+ bool data = opt->data_test;
+
+ err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+ if (err)
+ goto out_errno;
+ if (peek_flag) {
+ err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
+ if (err)
+ goto out_errno;
+ }
if (tx) {
clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -333,19 +472,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
- int slct, recv, max_fd = fd;
+ int slct, recvp = 0, recv, max_fd = fd;
int fd_flags = O_NONBLOCK;
struct timeval timeout;
float total_bytes;
- int bytes_cnt = 0;
- int chunk_sz;
fd_set w;
- if (opt->sendpage)
- chunk_sz = iov_length * cnt;
- else
- chunk_sz = iov_length * iov_count;
-
fcntl(fd, fd_flags);
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -377,6 +509,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
goto out_errno;
}
+ errno = 0;
+ if (peek_flag) {
+ flags |= MSG_PEEK;
+ recvp = recvmsg(fd, &msg_peek, flags);
+ if (recvp < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+ }
+ flags = 0;
+ }
+
recv = recvmsg(fd, &msg, flags);
if (recv < 0) {
if (errno != EWOULDBLOCK) {
@@ -388,27 +533,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
s->bytes_recvd += recv;
- if (data_test) {
- int j;
-
- for (i = 0; i < msg.msg_iovlen; i++) {
- unsigned char *d = iov[i].iov_base;
-
- for (j = 0;
- j < iov[i].iov_len && recv; j++) {
- if (d[j] != k++) {
- errno = -EIO;
- fprintf(stderr,
- "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
- i, j, d[j], k - 1, d[j+1], k);
- goto out_errno;
- }
- bytes_cnt++;
- if (bytes_cnt == chunk_sz) {
- k = 0;
- bytes_cnt = 0;
- }
- recv--;
+ if (data) {
+ int chunk_sz = opt->sendpage ?
+ iov_length * cnt :
+ iov_length * iov_count;
+
+ errno = msg_verify_data(&msg, recv, chunk_sz);
+ if (errno) {
+ perror("data verify msg failed\n");
+ goto out_errno;
+ }
+ if (recvp) {
+ errno = msg_verify_data(&msg_peek,
+ recvp,
+ chunk_sz);
+ if (errno) {
+ perror("data verify msg_peek failed\n");
+ goto out_errno;
}
}
}
@@ -416,14 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->end);
}
- for (i = 0; i < iov_count; i++)
- free(iov[i].iov_base);
- free(iov);
- return 0;
+ msg_free_iov(&msg);
+ msg_free_iov(&msg_peek);
+ return err;
out_errno:
- for (i = 0; i < iov_count; i++)
- free(iov[i].iov_base);
- free(iov);
+ msg_free_iov(&msg);
+ msg_free_iov(&msg_peek);
return errno;
}
@@ -456,6 +595,21 @@ static int sendmsg_test(struct sockmap_options *opt)
else
rx_fd = p2;
+ if (ktls) {
+ /* Redirecting into non-TLS socket which sends into a TLS
+ * socket is not a valid test. So in this case lets not
+ * enable kTLS but still run the test.
+ */
+ if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+ err = sockmap_init_ktls(opt->verbose, rx_fd);
+ if (err)
+ return err;
+ }
+ err = sockmap_init_ktls(opt->verbose, c1);
+ if (err)
+ return err;
+ }
+
rxpid = fork();
if (rxpid == 0) {
if (opt->drop_expected)
@@ -469,17 +623,16 @@ static int sendmsg_test(struct sockmap_options *opt)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
- shutdown(p2, SHUT_RDWR);
- shutdown(p1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
if (opt->verbose)
fprintf(stdout,
- "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
- s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
+ peek_flag ? "(peek_msg)" : "");
if (err && txmsg_cork)
err = 0;
exit(err ? 1 : 0);
@@ -500,7 +653,6 @@ static int sendmsg_test(struct sockmap_options *opt)
fprintf(stderr,
"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
- shutdown(c1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
@@ -755,6 +907,30 @@ run:
}
}
+ if (txmsg_start_push) {
+ i = 2;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start_push, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_end_push) {
+ i = 3;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end_push, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n",
+ txmsg_end_push, i, err, strerror(errno));
+ goto out;
+ }
+ }
+
if (txmsg_ingress) {
int in = BPF_F_INGRESS;
@@ -910,6 +1086,10 @@ static void test_options(char *options)
strncat(options, "ingress,", OPTSTRING);
if (txmsg_skb)
strncat(options, "skb,", OPTSTRING);
+ if (ktls)
+ strncat(options, "ktls,", OPTSTRING);
+ if (peek_flag)
+ strncat(options, "peek,", OPTSTRING);
}
static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
@@ -1083,6 +1263,8 @@ static int test_mixed(int cgrp)
txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
txmsg_apply = txmsg_cork = 0;
txmsg_start = txmsg_end = 0;
+ txmsg_start_push = txmsg_end_push = 0;
+
/* Test small and large iov_count values with pass/redir/apply/cork */
txmsg_pass = 1;
txmsg_redir = 0;
@@ -1199,6 +1381,8 @@ static int test_start_end(int cgrp)
/* Test basic start/end with lots of iov_count and iov_lengths */
txmsg_start = 1;
txmsg_end = 2;
+ txmsg_start_push = 1;
+ txmsg_end_push = 2;
err = test_txmsg(cgrp);
if (err)
goto out;
@@ -1212,6 +1396,8 @@ static int test_start_end(int cgrp)
for (i = 99; i <= 1600; i += 500) {
txmsg_start = 0;
txmsg_end = i;
+ txmsg_start_push = 0;
+ txmsg_end_push = i;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1221,6 +1407,8 @@ static int test_start_end(int cgrp)
for (i = 199; i <= 1600; i += 500) {
txmsg_start = 100;
txmsg_end = i;
+ txmsg_start_push = 100;
+ txmsg_end_push = i;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1229,6 +1417,8 @@ static int test_start_end(int cgrp)
/* Test start/end with cork pulling last sg entry */
txmsg_start = 1500;
txmsg_end = 1600;
+ txmsg_start_push = 1500;
+ txmsg_end_push = 1600;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1236,6 +1426,8 @@ static int test_start_end(int cgrp)
/* Test start/end pull of single byte in last page */
txmsg_start = 1111;
txmsg_end = 1112;
+ txmsg_start_push = 1111;
+ txmsg_end_push = 1112;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1243,6 +1435,8 @@ static int test_start_end(int cgrp)
/* Test start/end with end < start */
txmsg_start = 1111;
txmsg_end = 0;
+ txmsg_start_push = 1111;
+ txmsg_end_push = 0;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1250,6 +1444,8 @@ static int test_start_end(int cgrp)
/* Test start/end with end > data */
txmsg_start = 0;
txmsg_end = 1601;
+ txmsg_start_push = 0;
+ txmsg_end_push = 1601;
err = test_exec(cgrp, &opt);
if (err)
goto out;
@@ -1257,6 +1453,8 @@ static int test_start_end(int cgrp)
/* Test start/end with start > data */
txmsg_start = 1601;
txmsg_end = 1600;
+ txmsg_start_push = 1601;
+ txmsg_end_push = 1600;
err = test_exec(cgrp, &opt);
out:
@@ -1272,7 +1470,7 @@ char *map_names[] = {
"sock_map_redir",
"sock_apply_bytes",
"sock_cork_bytes",
- "sock_pull_bytes",
+ "sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
};
@@ -1348,9 +1546,9 @@ static int populate_progs(char *bpf_file)
return 0;
}
-static int __test_suite(char *bpf_file)
+static int __test_suite(int cg_fd, char *bpf_file)
{
- int cg_fd, err;
+ int err, cleanup = cg_fd;
err = populate_progs(bpf_file);
if (err < 0) {
@@ -1358,26 +1556,28 @@ static int __test_suite(char *bpf_file)
return err;
}
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
-
- cg_fd = create_and_get_cgroup(CG_PATH);
if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, optarg);
- return cg_fd;
- }
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+
+ if (join_cgroup(CG_PATH)) {
+ fprintf(stderr, "ERROR: failed to join cgroup\n");
+ return -EINVAL;
+ }
}
/* Tests basic commands and APIs with range of iov values */
- txmsg_start = txmsg_end = 0;
+ txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
err = test_txmsg(cg_fd);
if (err)
goto out;
@@ -1394,20 +1594,24 @@ static int __test_suite(char *bpf_file)
out:
printf("Summary: %i PASSED %i FAILED\n", passed, failed);
- cleanup_cgroup_environment();
- close(cg_fd);
+ if (cleanup < 0) {
+ cleanup_cgroup_environment();
+ close(cg_fd);
+ }
return err;
}
-static int test_suite(void)
+static int test_suite(int cg_fd)
{
int err;
- err = __test_suite(BPF_SOCKMAP_FILENAME);
+ err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
if (err)
goto out;
- err = __test_suite(BPF_SOCKHASH_FILENAME);
+ err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
out:
+ if (cg_fd > -1)
+ close(cg_fd);
return err;
}
@@ -1420,9 +1624,9 @@ int main(int argc, char **argv)
int test = PING_PONG;
if (argc < 2)
- return test_suite();
+ return test_suite(-1);
- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
long_options, &longindex)) != -1) {
switch (opt) {
case 's':
@@ -1431,6 +1635,12 @@ int main(int argc, char **argv)
case 'e':
txmsg_end = atoi(optarg);
break;
+ case 'p':
+ txmsg_start_push = atoi(optarg);
+ break;
+ case 'q':
+ txmsg_end_push = atoi(optarg);
+ break;
case 'a':
txmsg_apply = atoi(optarg);
break;
@@ -1486,6 +1696,9 @@ int main(int argc, char **argv)
}
}
+ if (argc <= 3 && cg_fd)
+ return test_suite(cg_fd);
+
if (!cg_fd) {
fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
argv[0]);
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index 8e8e41780bb9..14b8bbac004f 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -70,11 +70,11 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = {
.max_entries = 1
};
-struct bpf_map_def SEC("maps") sock_pull_bytes = {
+struct bpf_map_def SEC("maps") sock_bytes = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
- .max_entries = 2
+ .max_entries = 4
};
struct bpf_map_def SEC("maps") sock_redir_flags = {
@@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
SEC("sk_msg1")
int bpf_prog4(struct sk_msg_md *msg)
{
- int *bytes, zero = 0, one = 1;
- int *start, *end;
+ int *bytes, zero = 0, one = 1, two = 2, three = 3;
+ int *start, *end, *start_push, *end_push;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -190,18 +190,24 @@ int bpf_prog4(struct sk_msg_md *msg)
bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
if (bytes)
bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ start = bpf_map_lookup_elem(&sock_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_bytes, &one);
if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0);
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+ if (start_push && end_push)
+ bpf_msg_push_data(msg, *start_push, *end_push, 0);
return SK_PASS;
}
SEC("sk_msg2")
int bpf_prog5(struct sk_msg_md *msg)
{
- int err1 = -1, err2 = -1, zero = 0, one = 1;
- int *bytes, *start, *end, len1, len2;
+ int zero = 0, one = 1, two = 2, three = 3;
+ int *start, *end, *start_push, *end_push;
+ int *bytes, len1, len2 = 0, len3;
+ int err1 = -1, err2 = -1;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -210,8 +216,8 @@ int bpf_prog5(struct sk_msg_md *msg)
if (bytes)
err2 = bpf_msg_cork_bytes(msg, *bytes);
len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ start = bpf_map_lookup_elem(&sock_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_bytes, &one);
if (start && end) {
int err;
@@ -225,6 +231,23 @@ int bpf_prog5(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length update %i->%i\n",
len1, len2);
}
+
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+ if (start_push && end_push) {
+ int err;
+
+ bpf_printk("sk_msg2: push(%i:%i)\n",
+ start_push ? *start_push : 0,
+ end_push ? *end_push : 0);
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ bpf_printk("sk_msg2: push_data err %i\n", err);
+ len3 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length push_update %i->%i\n",
+ len2 ? len2 : len1, len3);
+ }
+
bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
len1, err1, err2);
return SK_PASS;
@@ -233,8 +256,8 @@ int bpf_prog5(struct sk_msg_md *msg)
SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg)
{
- int *bytes, zero = 0, one = 1, key = 0;
- int *start, *end, *f;
+ int *bytes, *start, *end, *start_push, *end_push, *f;
+ int zero = 0, one = 1, two = 2, three = 3, key = 0;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -243,10 +266,17 @@ int bpf_prog6(struct sk_msg_md *msg)
bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
if (bytes)
bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+
+ start = bpf_map_lookup_elem(&sock_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_bytes, &one);
if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0);
+
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+ if (start_push && end_push)
+ bpf_msg_push_data(msg, *start_push, *end_push, 0);
+
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
@@ -262,8 +292,9 @@ int bpf_prog6(struct sk_msg_md *msg)
SEC("sk_msg4")
int bpf_prog7(struct sk_msg_md *msg)
{
- int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
- int *f, *bytes, *start, *end, len1, len2;
+ int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3;
+ int *bytes, *start, *end, *start_push, *end_push, *f;
+ int err1 = 0, err2 = 0, key = 0;
__u64 flags = 0;
int err;
@@ -274,10 +305,10 @@ int bpf_prog7(struct sk_msg_md *msg)
if (bytes)
err2 = bpf_msg_cork_bytes(msg, *bytes);
len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end) {
+ start = bpf_map_lookup_elem(&sock_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_bytes, &one);
+ if (start && end) {
bpf_printk("sk_msg2: pull(%i:%i)\n",
start ? *start : 0, end ? *end : 0);
err = bpf_msg_pull_data(msg, *start, *end, 0);
@@ -288,6 +319,22 @@ int bpf_prog7(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length update %i->%i\n",
len1, len2);
}
+
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+ if (start_push && end_push) {
+ bpf_printk("sk_msg4: push(%i:%i)\n",
+ start_push ? *start_push : 0,
+ end_push ? *end_push : 0);
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ bpf_printk("sk_msg4: push_data err %i\n",
+ err);
+ len3 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg4: length push_update %i->%i\n",
+ len2 ? len2 : len1, len3);
+ }
+
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
@@ -342,8 +389,8 @@ int bpf_prog9(struct sk_msg_md *msg)
SEC("sk_msg7")
int bpf_prog10(struct sk_msg_md *msg)
{
- int *bytes, zero = 0, one = 1;
- int *start, *end;
+ int *bytes, *start, *end, *start_push, *end_push;
+ int zero = 0, one = 1, two = 2, three = 3;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -351,10 +398,14 @@ int bpf_prog10(struct sk_msg_md *msg)
bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
if (bytes)
bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ start = bpf_map_lookup_elem(&sock_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_bytes, &one);
if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0);
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+ if (start_push && end_push)
+ bpf_msg_push_data(msg, *start_push, *end_push, 0);
return SK_DROP;
}
diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c
new file mode 100644
index 000000000000..31c3880e6da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stack_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 4b7fd540cea9..74f73b33a7b0 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -5,6 +5,7 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
@@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") sockopt_results = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(int),
.max_entries = 2,
};
@@ -45,11 +53,14 @@ int _version SEC("version") = 1;
SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
- int rv = -1;
- int bad_call_rv = 0;
+ char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+ struct tcphdr *thdr;
int good_call_rv = 0;
- int op;
+ int bad_call_rv = 0;
+ int save_syn = 1;
+ int rv = -1;
int v = 0;
+ int op;
op = (int) skops->op;
@@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
v = 0xff;
rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
sizeof(v));
+ if (skops->family == AF_INET6) {
+ v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
+ header, (sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr)));
+ if (!v) {
+ int offset = sizeof(struct ipv6hdr);
+
+ thdr = (struct tcphdr *)(header + offset);
+ v = thdr->syn;
+ __u32 key = 1;
+
+ bpf_map_update_elem(&sockopt_results, &key, &v,
+ BPF_ANY);
+ }
+ }
break;
case BPF_SOCK_OPS_RTO_CB:
break;
@@ -111,6 +137,12 @@ int bpf_testcb(struct bpf_sock_ops *skops)
break;
case BPF_SOCK_OPS_TCP_LISTEN_CB:
bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
+ &save_syn, sizeof(save_syn));
+ /* Update global map w/ result of setsock opt */
+ __u32 key = 0;
+
+ bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
break;
default:
rv = -1;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index a275c2971376..e6eebda7d112 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -54,6 +54,26 @@ err:
return -1;
}
+int verify_sockopt_result(int sock_map_fd)
+{
+ __u32 key = 0;
+ int res;
+ int rv;
+
+ /* check setsockopt for SAVE_SYN */
+ rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+ EXPECT_EQ(0, rv, "d");
+ EXPECT_EQ(0, res, "d");
+ key = 1;
+ /* check getsockopt for SAVED_SYN */
+ rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+ EXPECT_EQ(0, rv, "d");
+ EXPECT_EQ(1, res, "d");
+ return 0;
+err:
+ return -1;
+}
+
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
@@ -70,11 +90,11 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
int main(int argc, char **argv)
{
const char *file = "test_tcpbpf_kern.o";
+ int prog_fd, map_fd, sock_map_fd;
struct tcpbpf_globals g = {0};
const char *cg_path = "/foo";
int error = EXIT_FAILURE;
struct bpf_object *obj;
- int prog_fd, map_fd;
int cg_fd = -1;
__u32 key = 0;
int rv;
@@ -110,6 +130,10 @@ int main(int argc, char **argv)
if (map_fd < 0)
goto err;
+ sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
+ if (sock_map_fd < 0)
+ goto err;
+
rv = bpf_map_lookup_elem(map_fd, &key, &g);
if (rv != 0) {
printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
@@ -121,6 +145,11 @@ int main(int argc, char **argv)
goto err;
}
+ if (verify_sockopt_result(sock_map_fd)) {
+ printf("FAILED: Wrong sockopt stats\n");
+ goto err;
+ }
+
printf("PASSED!\n");
error = 0;
err:
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 67c412d19c09..769d68a48f30 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2017 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -47,7 +48,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 8
+#define MAX_NR_MAPS 13
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -60,14 +61,19 @@ static bool unpriv_disabled = false;
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
- int fixup_map1[MAX_FIXUPS];
- int fixup_map2[MAX_FIXUPS];
- int fixup_map3[MAX_FIXUPS];
- int fixup_map4[MAX_FIXUPS];
+ int fixup_map_hash_8b[MAX_FIXUPS];
+ int fixup_map_hash_48b[MAX_FIXUPS];
+ int fixup_map_hash_16b[MAX_FIXUPS];
+ int fixup_map_array_48b[MAX_FIXUPS];
+ int fixup_map_sockmap[MAX_FIXUPS];
+ int fixup_map_sockhash[MAX_FIXUPS];
+ int fixup_map_xskmap[MAX_FIXUPS];
+ int fixup_map_stacktrace[MAX_FIXUPS];
int fixup_prog1[MAX_FIXUPS];
int fixup_prog2[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
int fixup_cgroup_storage[MAX_FIXUPS];
+ int fixup_percpu_cgroup_storage[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval;
@@ -177,6 +183,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
self->retval = (uint32_t)res;
}
+/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
+#define BPF_SK_LOOKUP \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ BPF_MOV64_IMM(BPF_REG_2, 0), \
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \
+ /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \
+ BPF_MOV64_IMM(BPF_REG_4, 0), \
+ BPF_MOV64_IMM(BPF_REG_5, 0), \
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+
static struct bpf_test tests[] = {
{
"add+sub+mul",
@@ -856,7 +880,7 @@ static struct bpf_test tests[] = {
BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 2 },
+ .fixup_map_hash_8b = { 2 },
.errstr = "invalid indirect read from stack",
.result = REJECT,
},
@@ -1090,7 +1114,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 invalid mem access 'map_value_or_null'",
.result = REJECT,
},
@@ -1107,7 +1131,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "misaligned value access",
.result = REJECT,
.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -1127,7 +1151,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 invalid mem access",
.errstr_unpriv = "R0 leaks addr",
.result = REJECT,
@@ -1217,7 +1241,7 @@ static struct bpf_test tests[] = {
BPF_FUNC_map_delete_elem),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 24 },
+ .fixup_map_hash_8b = { 24 },
.errstr_unpriv = "R1 pointer comparison",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -1371,7 +1395,7 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, pkt_type)),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.errstr = "different pointers",
.errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
@@ -1394,7 +1418,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_JMP_IMM(BPF_JA, 0, 0, -12),
},
- .fixup_map1 = { 6 },
+ .fixup_map_hash_8b = { 6 },
.errstr = "different pointers",
.errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
@@ -1418,7 +1442,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_JMP_IMM(BPF_JA, 0, 0, -13),
},
- .fixup_map1 = { 7 },
+ .fixup_map_hash_8b = { 7 },
.errstr = "different pointers",
.errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
@@ -2555,7 +2579,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr_unpriv = "R4 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -2572,7 +2596,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "invalid indirect read from stack off -8+0 size 8",
.result = REJECT,
},
@@ -2707,6 +2731,137 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "unpriv: spill/fill of different pointers stx - ctx and sock",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb == NULL) *target = sock; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* else *target = skb; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* struct __sk_buff *skb = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* skb->mark = 42; */
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ /* if (sk) bpf_sk_release(sk) */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "type=ctx expected=sock",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - leak sock",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb == NULL) *target = sock; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* else *target = skb; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* struct __sk_buff *skb = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* skb->mark = 42; */
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ //.errstr = "same insn cannot be used with different pointers",
+ .errstr = "Unreleased reference",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - sock and ctx (read)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb) *target = skb */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* else *target = sock */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* struct bpf_sock *sk = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_sock, mark)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx - sock and ctx (write)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ /* u64 foo; */
+ /* void *target = &foo; */
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ /* if (skb) *target = skb */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ /* else *target = sock */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ /* struct bpf_sock *sk = *target; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ /* if (sk) sk->mark = 42; bpf_sk_release(sk); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, mark)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ //.errstr = "same insn cannot be used with different pointers",
+ .errstr = "cannot write into socket",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
"unpriv: spill/fill of different pointers ldx",
.insns = {
BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
@@ -2743,7 +2898,7 @@ static struct bpf_test tests[] = {
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -2783,7 +2938,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 1 },
+ .fixup_map_hash_8b = { 1 },
.errstr_unpriv = "R1 pointer comparison",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -3275,7 +3430,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_ST stores into R1 context is not allowed",
+ .errstr = "BPF_ST stores into R1 ctx is not allowed",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -3287,7 +3442,7 @@ static struct bpf_test tests[] = {
BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 ctx is not allowed",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -3637,7 +3792,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -3922,7 +4077,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 5 },
+ .fixup_map_hash_8b = { 5 },
.result_unpriv = ACCEPT,
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_XDP,
@@ -3938,7 +4093,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 1 },
+ .fixup_map_hash_8b = { 1 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_XDP,
@@ -3966,7 +4121,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 11 },
+ .fixup_map_hash_8b = { 11 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -3988,7 +4143,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 7 },
+ .fixup_map_hash_8b = { 7 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_XDP,
@@ -4010,7 +4165,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 6 },
+ .fixup_map_hash_8b = { 6 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_XDP,
@@ -4033,7 +4188,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 5 },
+ .fixup_map_hash_8b = { 5 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -4048,7 +4203,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 1 },
+ .fixup_map_hash_8b = { 1 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4076,7 +4231,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 11 },
+ .fixup_map_hash_8b = { 11 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -4098,7 +4253,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 7 },
+ .fixup_map_hash_8b = { 7 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4120,7 +4275,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 6 },
+ .fixup_map_hash_8b = { 6 },
.result = REJECT,
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4391,6 +4546,85 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "prevent map lookup in sockmap",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 3 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+ },
+ {
+ "prevent map lookup in sockhash",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockhash = { 3 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+ },
+ {
+ "prevent map lookup in xskmap",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_xskmap = { 3 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "prevent map lookup in stack trace",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_stacktrace = { 3 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "prevent map lookup in prog array",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog2 = { 3 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
+ },
+ {
"valid map access into an array with a constant",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -4404,7 +4638,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -4426,7 +4660,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -4450,7 +4684,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -4478,7 +4712,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -4498,7 +4732,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=48 size=8",
.result = REJECT,
},
@@ -4519,7 +4753,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 min value is outside of the array range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4541,7 +4775,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4566,7 +4800,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.errstr = "R0 unbounded memory access",
.result_unpriv = REJECT,
@@ -4593,7 +4827,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.errstr = "invalid access to map value, value_size=48 off=44 size=8",
.result_unpriv = REJECT,
@@ -4623,12 +4857,183 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3, 11 },
+ .fixup_map_hash_48b = { 3, 11 },
.errstr = "R0 pointer += pointer",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "direct packet read test#1 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#2 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_proto)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, priority)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, priority)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff,
+ ingress_ifindex)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#3 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#4 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of tc_classid for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of data_meta for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data_meta)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of flow_keys for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, flow_keys)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid write access to napi_id for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
"valid cgroup storage access",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
@@ -4656,7 +5061,7 @@ static struct bpf_test tests[] = {
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 1 },
+ .fixup_map_hash_8b = { 1 },
.result = REJECT,
.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
@@ -4676,7 +5081,7 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
- "invalid per-cgroup storage access 3",
+ "invalid cgroup storage access 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0),
@@ -4744,6 +5149,121 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
+ "valid per-cpu cgroup storage access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_percpu_cgroup_storage = { 1 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 1 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "fd 1 is not pointing to valid bpf_map",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_percpu_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=256 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 7),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_percpu_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cpu cgroup storage access 6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_percpu_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
"multiple registers share map_lookup_elem result",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -4758,7 +5278,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4779,8 +5299,8 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .fixup_map_hash_8b = { 4 },
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4800,8 +5320,8 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .fixup_map_hash_8b = { 4 },
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4821,8 +5341,8 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
- .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .fixup_map_hash_8b = { 4 },
+ .errstr = "R4 pointer arithmetic on map_value_or_null",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4847,7 +5367,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.result = REJECT,
.errstr = "R4 !read_ok",
.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -4875,7 +5395,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS
},
@@ -4896,7 +5416,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 unbounded memory access",
.result = REJECT,
.errstr_unpriv = "R0 leaks addr",
@@ -5146,11 +5666,11 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[0])),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 2 },
+ .fixup_map_hash_8b = { 2 },
.errstr_unpriv = "R2 leaks addr into mem",
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 ctx is not allowed",
},
{
"leak pointer into ctx 2",
@@ -5165,7 +5685,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R10 leaks addr into mem",
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .errstr = "BPF_XADD stores into R1 ctx is not allowed",
},
{
"leak pointer into ctx 3",
@@ -5176,7 +5696,7 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[0])),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 1 },
+ .fixup_map_hash_8b = { 1 },
.errstr_unpriv = "R2 leaks addr into ctx",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -5198,7 +5718,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.errstr_unpriv = "R6 leaks addr into mem",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -5218,7 +5738,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5237,7 +5757,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5255,7 +5775,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_trace_printk),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=0 size=0",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5275,7 +5795,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=0 size=56",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5295,7 +5815,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R2 min value is negative",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5319,7 +5839,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5340,7 +5860,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5360,7 +5880,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_trace_printk),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=4 size=0",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5384,7 +5904,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=4 size=52",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5406,7 +5926,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R2 min value is negative",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5428,7 +5948,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R2 min value is negative",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5453,7 +5973,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5475,7 +5995,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5495,7 +6015,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_trace_printk),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R1 min value is outside of the array range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5520,7 +6040,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=4 size=52",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5543,7 +6063,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R2 min value is negative",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5566,7 +6086,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R2 min value is negative",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5592,7 +6112,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5615,7 +6135,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5637,7 +6157,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_trace_printk),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R1 min value is outside of the array range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5659,7 +6179,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R1 unbounded memory access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5685,7 +6205,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=4 size=45",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5709,7 +6229,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5732,7 +6252,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = REJECT,
.errstr = "R1 unbounded memory access",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5756,7 +6276,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5779,7 +6299,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = REJECT,
.errstr = "R1 unbounded memory access",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5804,7 +6324,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5828,7 +6348,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5852,7 +6372,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = REJECT,
.errstr = "R1 min value is negative",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5877,7 +6397,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5901,7 +6421,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5925,7 +6445,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = REJECT,
.errstr = "R1 min value is negative",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5944,7 +6464,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 8 },
+ .fixup_map_hash_16b = { 3, 8 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5964,7 +6484,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 10 },
+ .fixup_map_hash_16b = { 3, 10 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -5984,8 +6504,8 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
- .fixup_map3 = { 10 },
+ .fixup_map_hash_8b = { 3 },
+ .fixup_map_hash_16b = { 10 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=8 off=0 size=16",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6006,7 +6526,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 9 },
+ .fixup_map_hash_16b = { 3, 9 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6026,7 +6546,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 9 },
+ .fixup_map_hash_16b = { 3, 9 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=16 off=12 size=8",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6046,7 +6566,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 9 },
+ .fixup_map_hash_16b = { 3, 9 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6068,7 +6588,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 10 },
+ .fixup_map_hash_16b = { 3, 10 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6089,7 +6609,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 10 },
+ .fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=16 off=12 size=8",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6110,7 +6630,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 10 },
+ .fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6133,7 +6653,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 11 },
+ .fixup_map_hash_16b = { 3, 11 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6153,7 +6673,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 10 },
+ .fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6176,7 +6696,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map3 = { 3, 11 },
+ .fixup_map_hash_16b = { 3, 11 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=16 off=9 size=8",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6198,7 +6718,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
@@ -6219,7 +6739,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
@@ -6236,7 +6756,7 @@ static struct bpf_test tests[] = {
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R1 !read_ok",
.errstr = "R1 !read_ok",
.result = REJECT,
@@ -6270,7 +6790,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
@@ -6298,7 +6818,7 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
@@ -6317,7 +6837,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 bitwise operator &= on pointer",
.result = REJECT,
},
@@ -6334,7 +6854,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 32-bit pointer arithmetic prohibited",
.result = REJECT,
},
@@ -6351,7 +6871,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 pointer arithmetic with /= operator",
.result = REJECT,
},
@@ -6368,7 +6888,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.errstr = "invalid mem access 'inv'",
.result = REJECT,
@@ -6392,7 +6912,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
},
@@ -6415,7 +6935,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
@@ -6661,7 +7181,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6687,7 +7207,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "invalid access to map value, value_size=48 off=0 size=49",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6715,7 +7235,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6742,7 +7262,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R1 min value is outside of the array range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6814,7 +7334,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_csum_diff),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -6839,7 +7359,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_csum_diff),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -6862,7 +7382,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_csum_diff),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -6943,7 +7463,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6964,7 +7484,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -6984,7 +7504,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_probe_read),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -7059,7 +7579,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 max value is outside of the array range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7089,7 +7609,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr = "R0 max value is outside of the array range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7137,7 +7657,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
- .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+ .errstr = "R1 pointer arithmetic on map_ptr prohibited",
.result = REJECT,
},
{
@@ -7442,7 +7962,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7466,7 +7986,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7492,7 +8012,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7517,7 +8037,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7541,7 +8061,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
{
@@ -7565,7 +8085,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7611,7 +8131,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
{
@@ -7636,7 +8156,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7662,7 +8182,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
{
@@ -7687,7 +8207,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7714,7 +8234,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7740,7 +8260,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7769,7 +8289,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
},
@@ -7799,7 +8319,7 @@ static struct bpf_test tests[] = {
BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
BPF_JMP_IMM(BPF_JA, 0, 0, -7),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
},
@@ -7827,7 +8347,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
.result = REJECT,
.result_unpriv = REJECT,
@@ -7854,7 +8374,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 max value is outside of the array range",
.result = REJECT,
},
@@ -7879,7 +8399,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result = REJECT,
},
@@ -7905,7 +8425,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT
},
{
@@ -7930,7 +8450,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "map_value pointer and 4294967295",
.result = REJECT
},
@@ -7956,7 +8476,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 min value is outside of the array range",
.result = REJECT
},
@@ -7980,7 +8500,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.errstr = "value_size=8 off=1073741825",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8005,7 +8525,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 4 },
+ .fixup_map_hash_8b = { 4 },
.errstr = "value 1073741823",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8041,7 +8561,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT
},
{
@@ -8080,7 +8600,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
.errstr = "R0 unbounded memory access",
.result = REJECT
@@ -8123,7 +8643,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
.errstr = "R0 unbounded memory access",
.result = REJECT
@@ -8152,7 +8672,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT
},
{
@@ -8179,7 +8699,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 max value is outside of the array range",
.result = REJECT
},
@@ -8209,7 +8729,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R0 unbounded memory access",
.result = REJECT
},
@@ -8229,7 +8749,7 @@ static struct bpf_test tests[] = {
BPF_JMP_A(0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "map_value pointer and 2147483646",
.result = REJECT
},
@@ -8251,7 +8771,7 @@ static struct bpf_test tests[] = {
BPF_JMP_A(0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "pointer offset 1073741822",
.result = REJECT
},
@@ -8272,7 +8792,7 @@ static struct bpf_test tests[] = {
BPF_JMP_A(0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "pointer offset -1073741822",
.result = REJECT
},
@@ -8294,7 +8814,7 @@ static struct bpf_test tests[] = {
BPF_JMP_A(0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "map_value pointer and 1000000000000",
.result = REJECT
},
@@ -8314,7 +8834,7 @@ static struct bpf_test tests[] = {
BPF_JMP_A(0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.retval = POINTER_VALUE,
.result_unpriv = REJECT,
@@ -8335,7 +8855,7 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = ACCEPT,
.retval = POINTER_VALUE,
.result_unpriv = REJECT,
@@ -8403,7 +8923,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 5 },
+ .fixup_map_hash_8b = { 5 },
.errstr = "variable stack read R2",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_LWT_IN,
@@ -8484,7 +9004,7 @@ static struct bpf_test tests[] = {
offsetof(struct test_val, foo)),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 3 },
+ .fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
.errstr = "R0 unbounded memory access",
.result_unpriv = REJECT,
@@ -8811,7 +9331,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -8830,7 +9350,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .errstr = "R3 pointer arithmetic on pkt_end",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -10018,7 +10538,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 16 },
+ .fixup_map_hash_8b = { 16 },
.result = REJECT,
.errstr = "R0 min value is outside of the array range",
},
@@ -10969,7 +11489,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(), /* return 0 */
},
.prog_type = BPF_PROG_TYPE_XDP,
- .fixup_map1 = { 23 },
+ .fixup_map_hash_8b = { 23 },
.result = ACCEPT,
},
{
@@ -11024,7 +11544,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(), /* return 1 */
},
.prog_type = BPF_PROG_TYPE_XDP,
- .fixup_map1 = { 23 },
+ .fixup_map_hash_8b = { 23 },
.result = ACCEPT,
},
{
@@ -11079,7 +11599,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(), /* return 1 */
},
.prog_type = BPF_PROG_TYPE_XDP,
- .fixup_map1 = { 23 },
+ .fixup_map_hash_8b = { 23 },
.result = REJECT,
.errstr = "invalid read from stack off -16+0 size 8",
},
@@ -11151,7 +11671,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 12, 22 },
+ .fixup_map_hash_8b = { 12, 22 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=8 off=2 size=8",
},
@@ -11223,7 +11743,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 12, 22 },
+ .fixup_map_hash_8b = { 12, 22 },
.result = ACCEPT,
},
{
@@ -11294,7 +11814,7 @@ static struct bpf_test tests[] = {
BPF_JMP_IMM(BPF_JA, 0, 0, -8),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 12, 22 },
+ .fixup_map_hash_8b = { 12, 22 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=8 off=2 size=8",
},
@@ -11366,7 +11886,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 12, 22 },
+ .fixup_map_hash_8b = { 12, 22 },
.result = ACCEPT,
},
{
@@ -11437,7 +11957,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map1 = { 12, 22 },
+ .fixup_map_hash_8b = { 12, 22 },
.result = REJECT,
.errstr = "R0 invalid mem access 'inv'",
},
@@ -11782,7 +12302,7 @@ static struct bpf_test tests[] = {
BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 13 },
+ .fixup_map_hash_8b = { 13 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_XDP,
},
@@ -11809,7 +12329,7 @@ static struct bpf_test tests[] = {
BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 6 },
+ .fixup_map_hash_48b = { 6 },
.errstr = "invalid indirect read from stack off -8+0 size 8",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
@@ -11841,8 +12361,8 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map2 = { 13 },
- .fixup_map4 = { 16 },
+ .fixup_map_hash_48b = { 13 },
+ .fixup_map_array_48b = { 16 },
.result = ACCEPT,
.retval = 1,
},
@@ -11874,7 +12394,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_in_map = { 16 },
- .fixup_map4 = { 13 },
+ .fixup_map_array_48b = { 13 },
.result = REJECT,
.errstr = "R0 invalid mem access 'map_ptr'",
},
@@ -11942,7 +12462,7 @@ static struct bpf_test tests[] = {
BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "R6 invalid mem access 'inv'",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -11966,7 +12486,7 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.errstr = "invalid read from stack off -16+0 size 8",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -12088,7 +12608,7 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
BPF_EXIT_INSN(),
},
- .fixup_map1 = { 3 },
+ .fixup_map_hash_8b = { 3 },
.result = REJECT,
.errstr = "misaligned value access off",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -12114,7 +12634,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_XADD stores into R2 packet",
+ .errstr = "BPF_XADD stores into R2 pkt is not allowed",
.prog_type = BPF_PROG_TYPE_XDP,
},
{
@@ -12198,7 +12718,7 @@ static struct bpf_test tests[] = {
BPF_EMIT_CALL(BPF_FUNC_get_stack),
BPF_EXIT_INSN(),
},
- .fixup_map2 = { 4 },
+ .fixup_map_hash_48b = { 4 },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -12442,6 +12962,214 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
},
{
+ "reference tracking: leak potential reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak potential reference on stack",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak potential reference on stack 2",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: zero potential reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: copy and zero potential references",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference without check",
+ .insns = {
+ BPF_SK_LOOKUP,
+ /* reference in r0 may be NULL */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=sock_or_null expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference 2",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference twice",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: release reference twice inside branch",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: alloc, check, free in one subbranch",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+ /* if (offsetof(skb, mark) > data_len) exit; */
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+ offsetof(struct __sk_buff, mark)),
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
+ /* Leak reference in R0 */
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: alloc, check, free in both subbranches",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+ /* if (offsetof(skb, mark) > data_len) exit; */
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+ offsetof(struct __sk_buff, mark)),
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking in call: free reference in subprog",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
"pass modified ctx pointer to helper, 1",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
@@ -12511,6 +13239,407 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
+ {
+ "reference tracking in call: free reference in subprog and outside",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "type=inv expected=sock",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: alloc & leak reference in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
+ BPF_SK_LOOKUP,
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: alloc in subprog, release outside",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(), /* return sk */
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = POINTER_VALUE,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking in call: sk_ptr leak into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+ },
+ {
+ "reference tracking in call: sk_ptr spill into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ /* spill unchecked sk_ptr into stack of caller */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* now the sk_ptr is verified, free the reference */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_SK_LOOKUP,
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: allow LD_ABS",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: forbid LD_ABS while holding reference",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: allow LD_IND",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "reference tracking: forbid LD_IND while holding reference",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: check reference or tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 17 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: release reference then tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ BPF_SK_LOOKUP,
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 18 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: leak possible reference over tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ /* Look up socket and store in REG_6 */
+ BPF_SK_LOOKUP,
+ /* bpf_tail_call() */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* if (sk) bpf_sk_release() */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 16 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "tail_call would lead to reference leak",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: leak checked reference over tail call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ /* Look up socket and store in REG_6 */
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if (!sk) goto end */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* bpf_tail_call() */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 17 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "tail_call would lead to reference leak",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: mangle and release sock_or_null",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R1 pointer arithmetic on sock_or_null prohibited",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: mangle and release sock",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R1 pointer arithmetic on sock prohibited",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: access member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "reference tracking: write to member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LD_IMM64(BPF_REG_2, 42),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
+ offsetof(struct bpf_sock, mark)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "cannot write into socket",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: invalid 64-bit access of member",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid bpf_sock access off=0 size=8",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: access after release",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "!read_ok",
+ .result = REJECT,
+ },
+ {
+ "reference tracking: direct access for lookup",
+ .insns = {
+ /* Check that the packet is at least 64B long */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+ /* sk = sk_lookup_tcp(ctx, skb->data, ...) */
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
};
static int probe_filter_length(const struct bpf_insn *fp)
@@ -12536,18 +13665,18 @@ static int create_map(uint32_t type, uint32_t size_key,
return fd;
}
-static int create_prog_dummy1(void)
+static int create_prog_dummy1(enum bpf_map_type prog_type)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, 42),
BPF_EXIT_INSN(),
};
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ return bpf_load_program(prog_type, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_dummy2(int mfd, int idx)
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_3, idx),
@@ -12558,11 +13687,12 @@ static int create_prog_dummy2(int mfd, int idx)
BPF_EXIT_INSN(),
};
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ return bpf_load_program(prog_type, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_array(uint32_t max_elem, int p1key)
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+ int p1key)
{
int p2key = 1;
int mfd, p1fd, p2fd;
@@ -12574,8 +13704,8 @@ static int create_prog_array(uint32_t max_elem, int p1key)
return -1;
}
- p1fd = create_prog_dummy1();
- p2fd = create_prog_dummy2(mfd, p2key);
+ p1fd = create_prog_dummy1(prog_type);
+ p2fd = create_prog_dummy2(prog_type, mfd, p2key);
if (p1fd < 0 || p2fd < 0)
goto out;
if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
@@ -12615,32 +13745,39 @@ static int create_map_in_map(void)
return outer_map_fd;
}
-static int create_cgroup_storage(void)
+static int create_cgroup_storage(bool percpu)
{
+ enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE :
+ BPF_MAP_TYPE_CGROUP_STORAGE;
int fd;
- fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key),
+ fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
TEST_DATA_LEN, 0, 0);
if (fd < 0)
- printf("Failed to create array '%s'!\n", strerror(errno));
+ printf("Failed to create cgroup storage '%s'!\n",
+ strerror(errno));
return fd;
}
static char bpf_vlog[UINT_MAX >> 8];
-static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
- int *map_fds)
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+ struct bpf_insn *prog, int *map_fds)
{
- int *fixup_map1 = test->fixup_map1;
- int *fixup_map2 = test->fixup_map2;
- int *fixup_map3 = test->fixup_map3;
- int *fixup_map4 = test->fixup_map4;
+ int *fixup_map_hash_8b = test->fixup_map_hash_8b;
+ int *fixup_map_hash_48b = test->fixup_map_hash_48b;
+ int *fixup_map_hash_16b = test->fixup_map_hash_16b;
+ int *fixup_map_array_48b = test->fixup_map_array_48b;
+ int *fixup_map_sockmap = test->fixup_map_sockmap;
+ int *fixup_map_sockhash = test->fixup_map_sockhash;
+ int *fixup_map_xskmap = test->fixup_map_xskmap;
+ int *fixup_map_stacktrace = test->fixup_map_stacktrace;
int *fixup_prog1 = test->fixup_prog1;
int *fixup_prog2 = test->fixup_prog2;
int *fixup_map_in_map = test->fixup_map_in_map;
int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+ int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
if (test->fill_helper)
test->fill_helper(test);
@@ -12649,44 +13786,44 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
* for verifier and not do a runtime lookup, so the only thing
* that really matters is value size in this case.
*/
- if (*fixup_map1) {
+ if (*fixup_map_hash_8b) {
map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
sizeof(long long), 1);
do {
- prog[*fixup_map1].imm = map_fds[0];
- fixup_map1++;
- } while (*fixup_map1);
+ prog[*fixup_map_hash_8b].imm = map_fds[0];
+ fixup_map_hash_8b++;
+ } while (*fixup_map_hash_8b);
}
- if (*fixup_map2) {
+ if (*fixup_map_hash_48b) {
map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
sizeof(struct test_val), 1);
do {
- prog[*fixup_map2].imm = map_fds[1];
- fixup_map2++;
- } while (*fixup_map2);
+ prog[*fixup_map_hash_48b].imm = map_fds[1];
+ fixup_map_hash_48b++;
+ } while (*fixup_map_hash_48b);
}
- if (*fixup_map3) {
+ if (*fixup_map_hash_16b) {
map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
sizeof(struct other_val), 1);
do {
- prog[*fixup_map3].imm = map_fds[2];
- fixup_map3++;
- } while (*fixup_map3);
+ prog[*fixup_map_hash_16b].imm = map_fds[2];
+ fixup_map_hash_16b++;
+ } while (*fixup_map_hash_16b);
}
- if (*fixup_map4) {
+ if (*fixup_map_array_48b) {
map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
sizeof(struct test_val), 1);
do {
- prog[*fixup_map4].imm = map_fds[3];
- fixup_map4++;
- } while (*fixup_map4);
+ prog[*fixup_map_array_48b].imm = map_fds[3];
+ fixup_map_array_48b++;
+ } while (*fixup_map_array_48b);
}
if (*fixup_prog1) {
- map_fds[4] = create_prog_array(4, 0);
+ map_fds[4] = create_prog_array(prog_type, 4, 0);
do {
prog[*fixup_prog1].imm = map_fds[4];
fixup_prog1++;
@@ -12694,7 +13831,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_prog2) {
- map_fds[5] = create_prog_array(8, 7);
+ map_fds[5] = create_prog_array(prog_type, 8, 7);
do {
prog[*fixup_prog2].imm = map_fds[5];
fixup_prog2++;
@@ -12710,12 +13847,52 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_cgroup_storage) {
- map_fds[7] = create_cgroup_storage();
+ map_fds[7] = create_cgroup_storage(false);
do {
prog[*fixup_cgroup_storage].imm = map_fds[7];
fixup_cgroup_storage++;
} while (*fixup_cgroup_storage);
}
+
+ if (*fixup_percpu_cgroup_storage) {
+ map_fds[8] = create_cgroup_storage(true);
+ do {
+ prog[*fixup_percpu_cgroup_storage].imm = map_fds[8];
+ fixup_percpu_cgroup_storage++;
+ } while (*fixup_percpu_cgroup_storage);
+ }
+ if (*fixup_map_sockmap) {
+ map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int),
+ sizeof(int), 1);
+ do {
+ prog[*fixup_map_sockmap].imm = map_fds[9];
+ fixup_map_sockmap++;
+ } while (*fixup_map_sockmap);
+ }
+ if (*fixup_map_sockhash) {
+ map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int),
+ sizeof(int), 1);
+ do {
+ prog[*fixup_map_sockhash].imm = map_fds[10];
+ fixup_map_sockhash++;
+ } while (*fixup_map_sockhash);
+ }
+ if (*fixup_map_xskmap) {
+ map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int),
+ sizeof(int), 1);
+ do {
+ prog[*fixup_map_xskmap].imm = map_fds[11];
+ fixup_map_xskmap++;
+ } while (*fixup_map_xskmap);
+ }
+ if (*fixup_map_stacktrace) {
+ map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32),
+ sizeof(u64), 1);
+ do {
+ prog[*fixup_map_stacktrace].imm = map_fds[12];
+ fixup_map_stacktrace++;
+ } while (fixup_map_stacktrace);
+ }
}
static void do_test_single(struct bpf_test *test, bool unpriv,
@@ -12732,11 +13909,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
- do_test_fixup(test, prog, map_fds);
+ if (!prog_type)
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ do_test_fixup(test, prog_type, prog, map_fds);
prog_len = probe_filter_length(prog);
- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+ fd_prog = bpf_verify_program(prog_type, prog, prog_len,
+ test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
"GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
expected_ret = unpriv && test->result_unpriv != UNDEF ?
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c
new file mode 100644
index 000000000000..365a7d2d9f5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2018 Jesper Dangaard Brouer.
+ *
+ * XDP/TC VLAN manipulation example
+ *
+ * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
+ * else the VLAN tags are NOT inlined in the packet payload:
+ *
+ * # ethtool -K ixgbe2 rxvlan off
+ *
+ * Verify setting:
+ * # ethtool -k ixgbe2 | grep rx-vlan-offload
+ * rx-vlan-offload: off
+ *
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
+ *
+ * struct vlan_hdr - vlan header
+ * @h_vlan_TCI: priority and VLAN ID
+ * @h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct _vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT 13
+#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT VLAN_CFI_MASK
+#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
+#define VLAN_N_VID 4096
+
+struct parse_pkt {
+ __u16 l3_proto;
+ __u16 l3_offset;
+ __u16 vlan_outer;
+ __u16 vlan_inner;
+ __u8 vlan_outer_offset;
+ __u8 vlan_inner_offset;
+};
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline
+bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
+{
+ __u16 eth_type;
+ __u8 offset;
+
+ offset = sizeof(*eth);
+ /* Make sure packet is large enough for parsing eth + 2 VLAN headers */
+ if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
+ return false;
+
+ eth_type = eth->h_proto;
+
+ /* Handle outer VLAN tag */
+ if (eth_type == bpf_htons(ETH_P_8021Q)
+ || eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct _vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ pkt->vlan_outer_offset = offset;
+ pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+ & VLAN_VID_MASK;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ offset += sizeof(*vlan_hdr);
+ }
+
+ /* Handle inner (double) VLAN tag */
+ if (eth_type == bpf_htons(ETH_P_8021Q)
+ || eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct _vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ pkt->vlan_inner_offset = offset;
+ pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+ & VLAN_VID_MASK;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ offset += sizeof(*vlan_hdr);
+ }
+
+ pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
+ pkt->l3_offset = offset;
+
+ return true;
+}
+
+/* Hint, VLANs are choosen to hit network-byte-order issues */
+#define TESTVLAN 4011 /* 0xFAB */
+// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
+
+SEC("xdp_drop_vlan_4011")
+int xdp_prognum0(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Drop specific VLAN ID example */
+ if (pkt.vlan_outer == TESTVLAN)
+ return XDP_ABORTED;
+ /*
+ * Using XDP_ABORTED makes it possible to record this event,
+ * via tracepoint xdp:xdp_exception like:
+ * # perf record -a -e xdp:xdp_exception
+ * # perf script
+ */
+ return XDP_PASS;
+}
+/*
+Commands to setup VLAN on Linux to test packets gets dropped:
+
+ export ROOTDEV=ixgbe2
+ export VLANID=4011
+ ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
+ ip link set dev $ROOTDEV.$VLANID up
+
+ ip link set dev $ROOTDEV mtu 1508
+ ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
+
+Load prog with ip tool:
+
+ ip link set $ROOTDEV xdp off
+ ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
+
+*/
+
+/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
+#define TO_VLAN 0
+
+SEC("xdp_vlan_change")
+int xdp_prognum1(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Change specific VLAN ID */
+ if (pkt.vlan_outer == TESTVLAN) {
+ struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
+
+ /* Modifying VLAN, preserve top 4 bits */
+ vlan_hdr->h_vlan_TCI =
+ bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+ | TO_VLAN);
+ }
+
+ return XDP_PASS;
+}
+
+/*
+ * Show XDP+TC can cooperate, on creating a VLAN rewriter.
+ * 1. Create a XDP prog that can "pop"/remove a VLAN header.
+ * 2. Create a TC-bpf prog that egress can add a VLAN header.
+ */
+
+#ifndef ETH_ALEN /* Ethernet MAC address length */
+#define ETH_ALEN 6 /* bytes */
+#endif
+#define VLAN_HDR_SZ 4 /* bytes */
+
+SEC("xdp_vlan_remove_outer")
+int xdp_prognum2(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+ char *dest;
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Skip packet if no outer VLAN was detected */
+ if (pkt.vlan_outer_offset == 0)
+ return XDP_PASS;
+
+ /* Moving Ethernet header, dest overlap with src, memmove handle this */
+ dest = data;
+ dest+= VLAN_HDR_SZ;
+ /*
+ * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
+ * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
+ */
+ __builtin_memmove(dest, data, ETH_ALEN * 2);
+ /* Note: LLVM built-in memmove inlining require size to be constant */
+
+ /* Move start of packet header seen by Linux kernel stack */
+ bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+ return XDP_PASS;
+}
+
+static __always_inline
+void shift_mac_4bytes_16bit(void *data)
+{
+ __u16 *p = data;
+
+ p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
+ p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
+ p[5] = p[3];
+ p[4] = p[2];
+ p[3] = p[1];
+ p[2] = p[0];
+}
+
+static __always_inline
+void shift_mac_4bytes_32bit(void *data)
+{
+ __u32 *p = data;
+
+ /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
+ * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
+ * The vlan_hdr->h_vlan_encapsulated_proto take over role as
+ * ethhdr->h_proto.
+ */
+ p[3] = p[2];
+ p[2] = p[1];
+ p[1] = p[0];
+}
+
+SEC("xdp_vlan_remove_outer2")
+int xdp_prognum3(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *orig_eth = data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(orig_eth, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Skip packet if no outer VLAN was detected */
+ if (pkt.vlan_outer_offset == 0)
+ return XDP_PASS;
+
+ /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
+ shift_mac_4bytes_32bit(data);
+
+ /* Move start of packet header seen by Linux kernel stack */
+ bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+ return XDP_PASS;
+}
+
+/*=====================================
+ * BELOW: TC-hook based ebpf programs
+ * ====================================
+ * The TC-clsact eBPF programs (currently) need to be attach via TC commands
+ */
+
+SEC("tc_vlan_push")
+int _tc_progA(struct __sk_buff *ctx)
+{
+ bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
+
+ return TC_ACT_OK;
+}
+/*
+Commands to setup TC to use above bpf prog:
+
+export ROOTDEV=ixgbe2
+export FILE=xdp_vlan01_kern.o
+
+# Re-attach clsact to clear/flush existing role
+tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
+tc qdisc add dev $ROOTDEV clsact
+
+# Attach BPF prog EGRESS
+tc filter add dev $ROOTDEV egress \
+ prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+tc filter show dev $ROOTDEV egress
+*/
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
new file mode 100755
index 000000000000..51a3a31d1aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+TESTNAME=xdp_vlan
+
+usage() {
+ echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
+ echo ""
+ echo "Usage: $0 [-vfh]"
+ echo " -v | --verbose : Verbose"
+ echo " --flush : Flush before starting (e.g. after --interactive)"
+ echo " --interactive : Keep netns setup running after test-run"
+ echo ""
+}
+
+cleanup()
+{
+ local status=$?
+
+ if [ "$status" = "0" ]; then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+
+ if [ -n "$INTERACTIVE" ]; then
+ echo "Namespace setup still active explore with:"
+ echo " ip netns exec ns1 bash"
+ echo " ip netns exec ns2 bash"
+ exit $status
+ fi
+
+ set +e
+ ip link del veth1 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o hvfi: \
+ --long verbose,flush,help,interactive,debug -- "$@")
+if (( $? != 0 )); then
+ usage
+ echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
+ exit 2
+fi
+eval set -- "$OPTIONS"
+
+## --- Parse command line arguments / parameters ---
+while true; do
+ case "$1" in
+ -v | --verbose)
+ export VERBOSE=yes
+ shift
+ ;;
+ -i | --interactive | --debug )
+ INTERACTIVE=yes
+ shift
+ ;;
+ -f | --flush )
+ cleanup
+ shift
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ -h | --help )
+ usage;
+ echo "selftests: $TESTNAME [SKIP] usage help info requested"
+ exit 0
+ ;;
+ * )
+ shift
+ break
+ ;;
+ esac
+done
+
+if [ "$EUID" -ne 0 ]; then
+ echo "selftests: $TESTNAME [FAILED] need root privileges"
+ exit 1
+fi
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: $TESTNAME [SKIP] need ip xdp support"
+ exit 0
+fi
+
+# Interactive mode likely require us to cleanup netns
+if [ -n "$INTERACTIVE" ]; then
+ ip link del veth1 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+fi
+
+# Exit on failure
+set -e
+
+# Some shell-tools dependencies
+which ip > /dev/null
+which tc > /dev/null
+which ethtool > /dev/null
+
+# Make rest of shell verbose, showing comments as doc/info
+if [ -n "$VERBOSE" ]; then
+ set -v
+fi
+
+# Create two namespaces
+ip netns add ns1
+ip netns add ns2
+
+# Run cleanup if failing or on kill
+trap cleanup 0 2 3 6 9
+
+# Create veth pair
+ip link add veth1 type veth peer name veth2
+
+# Move veth1 and veth2 into the respective namespaces
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+# NOTICE: XDP require VLAN header inside packet payload
+# - Thus, disable VLAN offloading driver features
+# - For veth REMEMBER TX side VLAN-offload
+#
+# Disable rx-vlan-offload (mostly needed on ns1)
+ip netns exec ns1 ethtool -K veth1 rxvlan off
+ip netns exec ns2 ethtool -K veth2 rxvlan off
+#
+# Disable tx-vlan-offload (mostly needed on ns2)
+ip netns exec ns2 ethtool -K veth2 txvlan off
+ip netns exec ns1 ethtool -K veth1 txvlan off
+
+export IPADDR1=100.64.41.1
+export IPADDR2=100.64.41.2
+
+# In ns1/veth1 add IP-addr on plain net_device
+ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ns1 ip link set veth1 up
+
+# In ns2/veth2 create VLAN device
+export VLAN=4011
+export DEVNS2=veth2
+ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ns2 ip link set $DEVNS2 up
+ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+
+# Bringup lo in netns (to avoids confusing people using --interactive)
+ip netns exec ns1 ip link set lo up
+ip netns exec ns2 ip link set lo up
+
+# At this point, the hosts cannot reach each-other,
+# because ns2 are using VLAN tags on the packets.
+
+ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"'
+
+
+# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
+# ----------------------------------------------------------------------
+# In ns1: ingress use XDP to remove VLAN tags
+export DEVNS1=veth1
+export FILE=test_xdp_vlan.o
+
+# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+export XDP_PROG=xdp_vlan_change
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# In ns1: egress use TC to add back VLAN tag 4011
+# (del cmd)
+# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
+#
+ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
+ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+ prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+# Now the namespaces can reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
+
+# Second test: Replace xdp prog, that fully remove vlan header
+#
+# Catch kernel bug for generic-XDP, that does didn't allow us to
+# remove a VLAN header, because skb->protocol still contain VLAN
+# ETH_P_8021Q indication, and this cause overwriting of our changes.
+#
+export XDP_PROG=xdp_vlan_remove_outer2
+ip netns exec ns1 ip link set $DEVNS1 xdp off
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# Now the namespaces should still be able reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index cabe2a3a3b30..4cdb63bf0521 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -41,6 +41,7 @@ int load_kallsyms(void)
syms[i].name = strdup(func);
i++;
}
+ fclose(f);
sym_cnt = i;
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
return 0;
@@ -124,10 +125,11 @@ struct perf_event_sample {
char data[];
};
-static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+static enum bpf_perf_event_ret
+bpf_perf_event_print(struct perf_event_header *hdr, void *private_data)
{
- struct perf_event_sample *e = event;
- perf_event_print_fn fn = priv;
+ struct perf_event_sample *e = (struct perf_event_sample *)hdr;
+ perf_event_print_fn fn = private_data;
int ret;
if (e->header.type == PERF_RECORD_SAMPLE) {
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+ readonly SIT_DEV_NAME='sixtofourtest0'
+ readonly V6_SIT='2::/64'
+ readonly V4_SIT='172.17.0.1/32'
+ shift
+fi
+
+fail() {
+ echo "error: $*" 1>&2
+ exit 1
+}
+
+setup() {
+ ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+ ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+ if [[ -n "${V6_SIT}" ]]; then
+ ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+ || fail 'failed to add sit'
+ ip link set dev "${SIT_DEV_NAME}" up \
+ || fail 'failed to bring sit device up'
+ ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v6 SIT address'
+ ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v4 SIT address'
+ fi
+
+ sleep 2 # avoid race causing bind to fail
+}
+
+cleanup() {
+ if [[ -n "${V6_SIT}" ]]; then
+ ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+ ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+ ip link del "${SIT_DEV_NAME}"
+ fi
+
+ ip -4 addr del "${V4_INNER}" dev lo
+ ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+ ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+ ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+ ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+ echo "tunnels before test:"
+ ip tunnel show
+
+ ip link set "ipip_${SUFFIX}" up
+ ip link set "gre_${SUFFIX}" up
+ ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+ ip tunnel del "ipip_${SUFFIX}"
+ ip tunnel del "gre_${SUFFIX}"
+ ip tunnel del "sit_${SUFFIX}"
+
+ echo "tunnels after test:"
+ ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100755
index 000000000000..0150bb2741eb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +-----------------------+ +----------------------------------+
+# | H1 | | H2 |
+# | | | unicast --> + $h2.111 |
+# | | | traffic | 192.0.2.129/28 |
+# | multicast | | | e-qos-map 0:1 |
+# | traffic | | | |
+# | $h1 + <----- | | + $h2 |
+# +-----|-----------------+ +--------------|-------------------+
+# | |
+# +-----|-------------------------------------------------|-------------------+
+# | + $swp1 + $swp2 |
+# | | >1Gbps | >1Gbps |
+# | +---|----------------+ +----------|----------------+ |
+# | | + $swp1.1 | | + $swp2.111 | |
+# | | BR1 | SW | BR111 | |
+# | | + $swp3.1 | | + $swp3.111 | |
+# | +---|----------------+ +----------|----------------+ |
+# | \_________________________________________________/ |
+# | | |
+# | + $swp3 |
+# | | 1Gbps bottleneck |
+# | | prio qdisc: {0..7} -> 7 |
+# +------------------------------------|--------------------------------------+
+# |
+# +--|-----------------+
+# | + $h3 H3 |
+# | | |
+# | + $h3.111 |
+# | 192.0.2.130/28 |
+# +--------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_mc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+}
+
+h1_destroy()
+{
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+
+ vlan_create $h2 111 v$h2 192.0.2.129/28
+ ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ mtu_set $h3 10000
+
+ vlan_create $h3 111 v$h3 192.0.2.130/28
+}
+
+h3_destroy()
+{
+ vlan_destroy $h3 111
+
+ mtu_restore $h3
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+
+ vlan_create $swp2 111
+ vlan_create $swp3 111
+
+ ethtool -s $swp3 speed 1000 autoneg off
+ tc qdisc replace dev $swp3 root handle 3: \
+ prio bands 8 priomap 7 7 7 7 7 7 7 7
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 up
+ ip link set dev $swp1 master br1
+ ip link set dev $swp3 master br1
+
+ ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 up
+ ip link set dev $swp2.111 master br111
+ ip link set dev $swp3.111 master br111
+}
+
+switch_destroy()
+{
+ ip link del dev br111
+ ip link del dev br1
+
+ tc qdisc del dev $swp3 root handle 3:
+ ethtool -s $swp3 autoneg on
+
+ vlan_destroy $swp3 111
+ vlan_destroy $swp2 111
+
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h2 192.0.2.130
+}
+
+humanize()
+{
+ local speed=$1; shift
+
+ for unit in bps Kbps Mbps Gbps; do
+ if (($(echo "$speed < 1024" | bc))); then
+ break
+ fi
+
+ speed=$(echo "scale=1; $speed / 1024" | bc)
+ done
+
+ echo "$speed${unit}"
+}
+
+rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $((8 * (t1 - t0) / interval))
+}
+
+check_rate()
+{
+ local rate=$1; shift
+ local min=$1; shift
+ local what=$1; shift
+
+ if ((rate > min)); then
+ return 0
+ fi
+
+ echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
+ return 1
+}
+
+measure_uc_rate()
+{
+ local what=$1; shift
+
+ local interval=10
+ local i
+ local ret=0
+
+ # Dips in performance might cause momentary ingress rate to drop below
+ # 1Gbps. That wouldn't saturate egress and MC would thus get through,
+ # seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+ # average ingress rate to somewhat mitigate this.
+ local min_ingress=2147483648
+
+ mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ -a own -b $h3mac -t udp -q &
+ sleep 1
+
+ for i in {5..0}; do
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+ sleep $interval
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+ local ir=$(rate $u0 $u1 $interval)
+ local er=$(rate $t0 $t1 $interval)
+
+ if check_rate $ir $min_ingress "$what ingress rate"; then
+ break
+ fi
+
+ # Fail the test if we can't get the throughput.
+ if ((i == 0)); then
+ ret=1
+ fi
+ done
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ echo $ir $er
+ exit $ret
+}
+
+test_mc_aware()
+{
+ RET=0
+
+ local -a uc_rate
+ uc_rate=($(measure_uc_rate "UC-only"))
+ check_err $? "Could not get high enough UC-only ingress rate"
+ local ucth1=${uc_rate[1]}
+
+ mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+
+ local d0=$(date +%s)
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+ local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+ local -a uc_rate_2
+ uc_rate_2=($(measure_uc_rate "UC+MC"))
+ check_err $? "Could not get high enough UC+MC ingress rate"
+ local ucth2=${uc_rate_2[1]}
+
+ local d1=$(date +%s)
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+ local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+ local deg=$(bc <<< "
+ scale=2
+ ret = 100 * ($ucth1 - $ucth2) / $ucth1
+ if (ret > 0) { ret } else { 0 }
+ ")
+ check_err $(bc <<< "$deg > 10")
+
+ local interval=$((d1 - d0))
+ local mc_ir=$(rate $u0 $u1 $interval)
+ local mc_er=$(rate $t0 $t1 $interval)
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ log_test "UC performace under MC overload"
+
+ echo "UC-only throughput $(humanize $ucth1)"
+ echo "UC+MC throughput $(humanize $ucth2)"
+ echo "Degradation $deg %"
+ echo
+ echo "Full report:"
+ echo " UC only:"
+ echo " ingress UC throughput $(humanize ${uc_rate[0]})"
+ echo " egress UC throughput $(humanize ${uc_rate[1]})"
+ echo " UC+MC:"
+ echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
+ echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
+ echo " ingress MC throughput $(humanize $mc_ir)"
+ echo " egress MC throughput $(humanize $mc_er)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 5c34752e1cff..6210ba41c29e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,8 @@
-cr4_cpuid_sync_test
-platform_info_test
-set_sregs_test
-sync_regs_test
-vmx_tsc_adjust_test
-state_test
+/x86_64/cr4_cpuid_sync_test
+/x86_64/evmcs_test
+/x86_64/platform_info_test
+/x86_64/set_sregs_test
+/x86_64/sync_regs_test
+/x86_64/vmx_tsc_adjust_test
+/x86_64/state_test
+/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ec32dad3c3f0..01a219229238 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,26 +1,30 @@
all:
-top_srcdir = ../../../../
+top_srcdir = ../../../..
UNAME_M := $(shell uname -m)
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86.c lib/vmx.c
-
-TEST_GEN_PROGS_x86_64 = platform_info_test
-TEST_GEN_PROGS_x86_64 += set_sregs_test
-TEST_GEN_PROGS_x86_64 += sync_regs_test
-TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += state_test
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
+LIBKVM_aarch64 = lib/aarch64/processor.c
+
+TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_test
+
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
LDFLAGS += -pthread
# After inclusion, $(OUTPUT) is defined and
@@ -29,7 +33,7 @@ include ../lib.mk
STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
@@ -41,3 +45,12 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
all: $(STATIC_LIBS)
$(TEST_GEN_PROGS): $(STATIC_LIBS)
$(STATIC_LIBS):| khdr
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+ $(RM) cscope.*
+ (find $(include_paths) -name '*.h' \
+ -exec realpath --relative-base=$(PWD) {} \;; \
+ find . -name '*.c' \
+ -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+ cscope -b
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 0c2cdc105f96..d59820cc2d39 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -5,6 +5,8 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
+#define _GNU_SOURCE /* for program_invocation_name */
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -15,76 +17,78 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "processor.h"
+
+#define DEBUG printf
-#define DEBUG printf
+#define VCPU_ID 1
-#define VCPU_ID 1
/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX 1
-/*
- * GPA offset of the testing memory slot. Must be bigger than the
- * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
- */
-#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */
-/* Size of the testing memory slot */
-#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default guest test memory offset, 1G */
+#define DEFAULT_GUEST_TEST_MEM 0x40000000
+
/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
+#define TEST_PAGES_PER_LOOP 1024
+
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
-#define TEST_HOST_LOOP_N 32
+#define TEST_HOST_LOOP_N 32
+
/* Interval for each host loop (ms) */
-#define TEST_HOST_LOOP_INTERVAL 10
+#define TEST_HOST_LOOP_INTERVAL 10
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+static uint64_t guest_num_pages;
+static uint64_t random_array[TEST_PAGES_PER_LOOP];
+static uint64_t iteration;
/*
- * Guest variables. We use these variables to share data between host
- * and guest. There are two copies of the variables, one in host memory
- * (which is unused) and one in guest memory. When the host wants to
- * access these variables, it needs to call addr_gva2hva() to access the
- * guest copy.
+ * GPA offset of the testing memory slot. Must be bigger than
+ * DEFAULT_GUEST_PHY_PAGES.
*/
-uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
-uint64_t guest_iteration;
-uint64_t guest_page_size;
+static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM;
/*
- * Writes to the first byte of a random page within the testing memory
- * region continuously.
+ * Continuously write to the first 8 bytes of a random pages within
+ * the testing memory region.
*/
-void guest_code(void)
+static void guest_code(void)
{
- int i = 0;
- uint64_t volatile *array = guest_random_array;
- uint64_t volatile *guest_addr;
+ int i;
while (true) {
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
- /*
- * Write to the first 8 bytes of a random page
- * on the testing memory region.
- */
- guest_addr = (uint64_t *)
- (TEST_MEM_OFFSET +
- (array[i] % TEST_MEM_PAGES) * guest_page_size);
- *guest_addr = guest_iteration;
+ uint64_t addr = guest_test_mem;
+ addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+ * guest_page_size;
+ addr &= ~(host_page_size - 1);
+ *(uint64_t *)addr = READ_ONCE(iteration);
}
+
/* Tell the host that we need more random numbers */
GUEST_SYNC(1);
}
}
-/*
- * Host variables. These variables should only be used by the host
- * rather than the guest.
- */
-bool host_quit;
+/* Host variables */
+static bool host_quit;
/* Points to the test VM memory region on which we track dirty logs */
-void *host_test_mem;
+static void *host_test_mem;
+static uint64_t host_num_pages;
/* For statistics only */
-uint64_t host_dirty_count;
-uint64_t host_clear_count;
-uint64_t host_track_next_count;
+static uint64_t host_dirty_count;
+static uint64_t host_clear_count;
+static uint64_t host_track_next_count;
/*
* We use this bitmap to track some pages that should have its dirty
@@ -93,40 +97,34 @@ uint64_t host_track_next_count;
* page bit is cleared in the latest bitmap, then the system must
* report that write in the next get dirty log call.
*/
-unsigned long *host_bmap_track;
+static unsigned long *host_bmap_track;
-void generate_random_array(uint64_t *guest_array, uint64_t size)
+static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
- for (i = 0; i < size; i++) {
+ for (i = 0; i < size; i++)
guest_array[i] = random();
- }
}
-void *vcpu_worker(void *data)
+static void *vcpu_worker(void *data)
{
int ret;
- uint64_t loops, *guest_array, pages_count = 0;
struct kvm_vm *vm = data;
+ uint64_t *guest_array;
+ uint64_t pages_count = 0;
struct kvm_run *run;
- struct guest_args args;
+ struct ucall uc;
run = vcpu_state(vm, VCPU_ID);
- /* Retrieve the guest random array pointer and cache it */
- guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
-
- DEBUG("VCPU starts\n");
-
+ guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
while (!READ_ONCE(host_quit)) {
- /* Let the guest to dirty these random pages */
+ /* Let the guest dirty the random pages */
ret = _vcpu_run(vm, VCPU_ID);
- guest_args_read(vm, VCPU_ID, &args);
- if (run->exit_reason == KVM_EXIT_IO &&
- args.port == GUEST_PORT_SYNC) {
+ if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
pages_count += TEST_PAGES_PER_LOOP;
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
} else {
@@ -137,18 +135,20 @@ void *vcpu_worker(void *data)
}
}
- DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+ DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
return NULL;
}
-void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+static void vm_dirty_log_verify(unsigned long *bmap)
{
uint64_t page;
- uint64_t volatile *value_ptr;
+ uint64_t *value_ptr;
+ uint64_t step = host_page_size >= guest_page_size ? 1 :
+ guest_page_size / host_page_size;
- for (page = 0; page < TEST_MEM_PAGES; page++) {
- value_ptr = host_test_mem + page * getpagesize();
+ for (page = 0; page < host_num_pages; page += step) {
+ value_ptr = host_test_mem + page * host_page_size;
/* If this is a special page that we were tracking... */
if (test_and_clear_bit(page, host_bmap_track)) {
@@ -208,88 +208,117 @@ void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
}
}
-void help(char *name)
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+ uint64_t extra_mem_pages, void *guest_code)
{
- puts("");
- printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
- puts("");
- printf(" -i: specify iteration counts (default: %"PRIu64")\n",
- TEST_HOST_LOOP_N);
- printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
- TEST_HOST_LOOP_INTERVAL);
- puts("");
- exit(0);
+ struct kvm_vm *vm;
+ uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+ vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+ vm_create_irqchip(vm);
+#endif
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+ return vm;
}
-int main(int argc, char *argv[])
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+ unsigned long interval, bool top_offset)
{
+ unsigned int guest_pa_bits, guest_page_shift;
pthread_t vcpu_thread;
struct kvm_vm *vm;
- uint64_t volatile *psize, *iteration;
- unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
- interval = TEST_HOST_LOOP_INTERVAL;
- int opt;
-
- while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
- switch (opt) {
- case 'i':
- iterations = strtol(optarg, NULL, 10);
- break;
- case 'I':
- interval = strtol(optarg, NULL, 10);
- break;
- case 'h':
- default:
- help(argv[0]);
- break;
- }
+ uint64_t max_gfn;
+ unsigned long *bmap;
+
+ switch (mode) {
+ case VM_MODE_P52V48_4K:
+ guest_pa_bits = 52;
+ guest_page_shift = 12;
+ break;
+ case VM_MODE_P52V48_64K:
+ guest_pa_bits = 52;
+ guest_page_shift = 16;
+ break;
+ case VM_MODE_P40V48_4K:
+ guest_pa_bits = 40;
+ guest_page_shift = 12;
+ break;
+ case VM_MODE_P40V48_64K:
+ guest_pa_bits = 40;
+ guest_page_shift = 16;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
}
- TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
- TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+ DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
- iterations, interval);
+ max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
+ guest_page_size = (1ul << guest_page_shift);
+ /* 1G of guest page sized pages */
+ guest_num_pages = (1ul << (30 - guest_page_shift));
+ host_page_size = getpagesize();
+ host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
+ !!((guest_num_pages * guest_page_size) % host_page_size);
- srandom(time(0));
+ if (top_offset) {
+ guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size;
+ guest_test_mem &= ~(host_page_size - 1);
+ }
- bmap = bitmap_alloc(TEST_MEM_PAGES);
- host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+ DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem);
- vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+ bmap = bitmap_alloc(host_num_pages);
+ host_bmap_track = bitmap_alloc(host_num_pages);
+
+ vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- TEST_MEM_OFFSET,
+ guest_test_mem,
TEST_MEM_SLOT_INDEX,
- TEST_MEM_PAGES,
+ guest_num_pages,
KVM_MEM_LOG_DIRTY_PAGES);
- /* Cache the HVA pointer of the region */
- host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
/* Do 1:1 mapping for the dirty track memory slot */
- virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
- TEST_MEM_PAGES * getpagesize(), 0);
+ virt_map(vm, guest_test_mem, guest_test_mem,
+ guest_num_pages * guest_page_size, 0);
+
+ /* Cache the HVA pointer of the region */
+ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem);
+#ifdef __x86_64__
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+#endif
+#ifdef __aarch64__
+ ucall_init(vm, UCALL_MMIO, NULL);
+#endif
- /* Tell the guest about the page size on the system */
- psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
- *psize = getpagesize();
+ /* Export the shared variables to the guest */
+ sync_global_to_guest(vm, host_page_size);
+ sync_global_to_guest(vm, guest_page_size);
+ sync_global_to_guest(vm, guest_test_mem);
+ sync_global_to_guest(vm, guest_num_pages);
/* Start the iterations */
- iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
- *iteration = 1;
+ iteration = 1;
+ sync_global_to_guest(vm, iteration);
+ host_quit = false;
+ host_dirty_count = 0;
+ host_clear_count = 0;
+ host_track_next_count = 0;
- /* Start dirtying pages */
pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
- while (*iteration < iterations) {
+ while (iteration < iterations) {
/* Give the vcpu thread some time to dirty some pages */
usleep(interval * 1000);
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- vm_dirty_log_verify(bmap, *iteration);
- (*iteration)++;
+ vm_dirty_log_verify(bmap);
+ iteration++;
+ sync_global_to_guest(vm, iteration);
}
/* Tell the vcpu thread to quit */
@@ -302,7 +331,118 @@ int main(int argc, char *argv[])
free(bmap);
free(host_bmap_track);
+ ucall_uninit(vm);
kvm_vm_free(vm);
+}
+
+static struct vm_guest_modes {
+ enum vm_guest_mode mode;
+ bool supported;
+ bool enabled;
+} vm_guest_modes[NUM_VM_MODES] = {
+#if defined(__x86_64__)
+ { VM_MODE_P52V48_4K, 1, 1, },
+ { VM_MODE_P52V48_64K, 0, 0, },
+ { VM_MODE_P40V48_4K, 0, 0, },
+ { VM_MODE_P40V48_64K, 0, 0, },
+#elif defined(__aarch64__)
+ { VM_MODE_P52V48_4K, 0, 0, },
+ { VM_MODE_P52V48_64K, 0, 0, },
+ { VM_MODE_P40V48_4K, 1, 1, },
+ { VM_MODE_P40V48_64K, 1, 1, },
+#endif
+};
+
+static void help(char *name)
+{
+ int i;
+
+ puts("");
+ printf("usage: %s [-h] [-i iterations] [-I interval] "
+ "[-o offset] [-t] [-m mode]\n", name);
+ puts("");
+ printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+ TEST_HOST_LOOP_N);
+ printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+ TEST_HOST_LOOP_INTERVAL);
+ printf(" -o: guest test memory offset (default: 0x%lx)\n",
+ DEFAULT_GUEST_TEST_MEM);
+ printf(" -t: map guest test memory at the top of the allowed "
+ "physical address range\n");
+ printf(" -m: specify the guest mode ID to test "
+ "(default: test all supported modes)\n"
+ " This option may be used multiple times.\n"
+ " Guest mode IDs:\n");
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ printf(" %d: %s%s\n",
+ vm_guest_modes[i].mode,
+ vm_guest_mode_string(vm_guest_modes[i].mode),
+ vm_guest_modes[i].supported ? " (supported)" : "");
+ }
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long iterations = TEST_HOST_LOOP_N;
+ unsigned long interval = TEST_HOST_LOOP_INTERVAL;
+ bool mode_selected = false;
+ bool top_offset = false;
+ unsigned int mode;
+ int opt, i;
+
+ while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
+ switch (opt) {
+ case 'i':
+ iterations = strtol(optarg, NULL, 10);
+ break;
+ case 'I':
+ interval = strtol(optarg, NULL, 10);
+ break;
+ case 'o':
+ guest_test_mem = strtoull(optarg, NULL, 0);
+ break;
+ case 't':
+ top_offset = true;
+ break;
+ case 'm':
+ if (!mode_selected) {
+ for (i = 0; i < NUM_VM_MODES; ++i)
+ vm_guest_modes[i].enabled = 0;
+ mode_selected = true;
+ }
+ mode = strtoul(optarg, NULL, 10);
+ TEST_ASSERT(mode < NUM_VM_MODES,
+ "Guest mode ID %d too big", mode);
+ vm_guest_modes[mode].enabled = 1;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+ TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM,
+ "Cannot use both -o [offset] and -t at the same time");
+
+ DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+ iterations, interval);
+
+ srandom(time(0));
+
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ if (!vm_guest_modes[i].enabled)
+ continue;
+ TEST_ASSERT(vm_guest_modes[i].supported,
+ "Guest mode ID %d (%s) not supported.",
+ vm_guest_modes[i].mode,
+ vm_guest_mode_string(vm_guest_modes[i].mode));
+ run_test(vm_guest_modes[i].mode, iterations, interval, top_offset);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
new file mode 100644
index 000000000000..9ef2ab1a0c08
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define CPACR_EL1 3, 0, 1, 0, 2
+#define TCR_EL1 3, 0, 2, 0, 2
+#define MAIR_EL1 3, 0, 10, 2, 0
+#define TTBR0_EL1 3, 0, 2, 0, 0
+#define SCTLR_EL1 3, 0, 1, 0, 0
+
+/*
+ * Default MAIR
+ * index attribute
+ * DEVICE_nGnRnE 0 0000:0000
+ * DEVICE_nGnRE 1 0000:0100
+ * DEVICE_GRE 2 0000:1100
+ * NORMAL_NC 3 0100:0100
+ * NORMAL 4 1111:1111
+ * NORMAL_WT 5 1011:1011
+ */
+#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
+ (0x04ul << (1 * 8)) | \
+ (0x0cul << (2 * 8)) | \
+ (0x44ul << (3 * 8)) | \
+ (0xfful << (4 * 8)) | \
+ (0xbbul << (5 * 8)))
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+{
+ struct kvm_one_reg reg;
+ reg.id = id;
+ reg.addr = (uint64_t)addr;
+ vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg;
+ reg.id = id;
+ reg.addr = (uint64_t)&val;
+ vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
new file mode 100644
index 000000000000..4059014d93ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -0,0 +1,1098 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+extern bool enable_evmcs;
+
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved;
+ __u64 vtl_control[2];
+ __u64 nested_enlightenments_control[2];
+ __u32 enlighten_vmentry;
+ __u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+ u32 revision_id;
+ u32 abort;
+
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+
+ u64 host_cr0;
+ u64 host_cr3;
+ u64 host_cr4;
+
+ u64 host_ia32_sysenter_esp;
+ u64 host_ia32_sysenter_eip;
+ u64 host_rip;
+ u32 host_ia32_sysenter_cs;
+
+ u32 pin_based_vm_exec_control;
+ u32 vm_exit_controls;
+ u32 secondary_vm_exec_control;
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+
+ u64 guest_es_base;
+ u64 guest_cs_base;
+ u64 guest_ss_base;
+ u64 guest_ds_base;
+ u64 guest_fs_base;
+ u64 guest_gs_base;
+ u64 guest_ldtr_base;
+ u64 guest_tr_base;
+ u64 guest_gdtr_base;
+ u64 guest_idtr_base;
+
+ u64 padding64_1[3];
+
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+
+ u64 cr3_target_value0;
+ u64 cr3_target_value1;
+ u64 cr3_target_value2;
+ u64 cr3_target_value3;
+
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+
+ u32 cr3_target_count;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_msr_load_count;
+
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 vmcs_link_pointer;
+
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+
+ u64 guest_pending_dbg_exceptions;
+ u64 guest_sysenter_esp;
+ u64 guest_sysenter_eip;
+
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+
+ u64 cr0_guest_host_mask;
+ u64 cr4_guest_host_mask;
+ u64 cr0_read_shadow;
+ u64 cr4_read_shadow;
+ u64 guest_cr0;
+ u64 guest_cr3;
+ u64 guest_cr4;
+ u64 guest_dr7;
+
+ u64 host_fs_base;
+ u64 host_gs_base;
+ u64 host_tr_base;
+ u64 host_gdtr_base;
+ u64 host_idtr_base;
+ u64 host_rsp;
+
+ u64 ept_pointer;
+
+ u16 virtual_processor_id;
+ u16 padding16[3];
+
+ u64 padding64_2[5];
+ u64 guest_physical_address;
+
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+
+ u64 exit_qualification;
+ u64 exit_io_instruction_ecx;
+ u64 exit_io_instruction_esi;
+ u64 exit_io_instruction_edi;
+ u64 exit_io_instruction_eip;
+
+ u64 guest_linear_address;
+ u64 guest_rsp;
+ u64 guest_rflags;
+
+ u32 guest_interruptibility_info;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 vm_entry_controls;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+
+ u64 guest_rip;
+
+ u32 hv_clean_fields;
+ u32 hv_padding_32;
+ u32 hv_synthetic_controls;
+ struct {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 reserved:30;
+ } hv_enlightenments_control;
+ u32 hv_vp_id;
+
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 padding64_4[4];
+ u64 guest_bndcfgs;
+ u64 padding64_5[7];
+ u64 xss_exit_bitmap;
+ u64 padding64_6[7];
+};
+
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+ u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+ current_vp_assist = vp_assist;
+
+ enable_evmcs = true;
+
+ return 0;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+ current_vp_assist->current_nested_vmcs = vmcs_pa;
+ current_vp_assist->enlighten_vmentry = 1;
+
+ current_evmcs = vmcs;
+
+ return 0;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+ *value = current_vp_assist->current_nested_vmcs &
+ ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+ switch (encoding) {
+ case GUEST_RIP:
+ *value = current_evmcs->guest_rip;
+ break;
+ case GUEST_RSP:
+ *value = current_evmcs->guest_rsp;
+ break;
+ case GUEST_RFLAGS:
+ *value = current_evmcs->guest_rflags;
+ break;
+ case HOST_IA32_PAT:
+ *value = current_evmcs->host_ia32_pat;
+ break;
+ case HOST_IA32_EFER:
+ *value = current_evmcs->host_ia32_efer;
+ break;
+ case HOST_CR0:
+ *value = current_evmcs->host_cr0;
+ break;
+ case HOST_CR3:
+ *value = current_evmcs->host_cr3;
+ break;
+ case HOST_CR4:
+ *value = current_evmcs->host_cr4;
+ break;
+ case HOST_IA32_SYSENTER_ESP:
+ *value = current_evmcs->host_ia32_sysenter_esp;
+ break;
+ case HOST_IA32_SYSENTER_EIP:
+ *value = current_evmcs->host_ia32_sysenter_eip;
+ break;
+ case HOST_RIP:
+ *value = current_evmcs->host_rip;
+ break;
+ case IO_BITMAP_A:
+ *value = current_evmcs->io_bitmap_a;
+ break;
+ case IO_BITMAP_B:
+ *value = current_evmcs->io_bitmap_b;
+ break;
+ case MSR_BITMAP:
+ *value = current_evmcs->msr_bitmap;
+ break;
+ case GUEST_ES_BASE:
+ *value = current_evmcs->guest_es_base;
+ break;
+ case GUEST_CS_BASE:
+ *value = current_evmcs->guest_cs_base;
+ break;
+ case GUEST_SS_BASE:
+ *value = current_evmcs->guest_ss_base;
+ break;
+ case GUEST_DS_BASE:
+ *value = current_evmcs->guest_ds_base;
+ break;
+ case GUEST_FS_BASE:
+ *value = current_evmcs->guest_fs_base;
+ break;
+ case GUEST_GS_BASE:
+ *value = current_evmcs->guest_gs_base;
+ break;
+ case GUEST_LDTR_BASE:
+ *value = current_evmcs->guest_ldtr_base;
+ break;
+ case GUEST_TR_BASE:
+ *value = current_evmcs->guest_tr_base;
+ break;
+ case GUEST_GDTR_BASE:
+ *value = current_evmcs->guest_gdtr_base;
+ break;
+ case GUEST_IDTR_BASE:
+ *value = current_evmcs->guest_idtr_base;
+ break;
+ case TSC_OFFSET:
+ *value = current_evmcs->tsc_offset;
+ break;
+ case VIRTUAL_APIC_PAGE_ADDR:
+ *value = current_evmcs->virtual_apic_page_addr;
+ break;
+ case VMCS_LINK_POINTER:
+ *value = current_evmcs->vmcs_link_pointer;
+ break;
+ case GUEST_IA32_DEBUGCTL:
+ *value = current_evmcs->guest_ia32_debugctl;
+ break;
+ case GUEST_IA32_PAT:
+ *value = current_evmcs->guest_ia32_pat;
+ break;
+ case GUEST_IA32_EFER:
+ *value = current_evmcs->guest_ia32_efer;
+ break;
+ case GUEST_PDPTR0:
+ *value = current_evmcs->guest_pdptr0;
+ break;
+ case GUEST_PDPTR1:
+ *value = current_evmcs->guest_pdptr1;
+ break;
+ case GUEST_PDPTR2:
+ *value = current_evmcs->guest_pdptr2;
+ break;
+ case GUEST_PDPTR3:
+ *value = current_evmcs->guest_pdptr3;
+ break;
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ *value = current_evmcs->guest_pending_dbg_exceptions;
+ break;
+ case GUEST_SYSENTER_ESP:
+ *value = current_evmcs->guest_sysenter_esp;
+ break;
+ case GUEST_SYSENTER_EIP:
+ *value = current_evmcs->guest_sysenter_eip;
+ break;
+ case CR0_GUEST_HOST_MASK:
+ *value = current_evmcs->cr0_guest_host_mask;
+ break;
+ case CR4_GUEST_HOST_MASK:
+ *value = current_evmcs->cr4_guest_host_mask;
+ break;
+ case CR0_READ_SHADOW:
+ *value = current_evmcs->cr0_read_shadow;
+ break;
+ case CR4_READ_SHADOW:
+ *value = current_evmcs->cr4_read_shadow;
+ break;
+ case GUEST_CR0:
+ *value = current_evmcs->guest_cr0;
+ break;
+ case GUEST_CR3:
+ *value = current_evmcs->guest_cr3;
+ break;
+ case GUEST_CR4:
+ *value = current_evmcs->guest_cr4;
+ break;
+ case GUEST_DR7:
+ *value = current_evmcs->guest_dr7;
+ break;
+ case HOST_FS_BASE:
+ *value = current_evmcs->host_fs_base;
+ break;
+ case HOST_GS_BASE:
+ *value = current_evmcs->host_gs_base;
+ break;
+ case HOST_TR_BASE:
+ *value = current_evmcs->host_tr_base;
+ break;
+ case HOST_GDTR_BASE:
+ *value = current_evmcs->host_gdtr_base;
+ break;
+ case HOST_IDTR_BASE:
+ *value = current_evmcs->host_idtr_base;
+ break;
+ case HOST_RSP:
+ *value = current_evmcs->host_rsp;
+ break;
+ case EPT_POINTER:
+ *value = current_evmcs->ept_pointer;
+ break;
+ case GUEST_BNDCFGS:
+ *value = current_evmcs->guest_bndcfgs;
+ break;
+ case XSS_EXIT_BITMAP:
+ *value = current_evmcs->xss_exit_bitmap;
+ break;
+ case GUEST_PHYSICAL_ADDRESS:
+ *value = current_evmcs->guest_physical_address;
+ break;
+ case EXIT_QUALIFICATION:
+ *value = current_evmcs->exit_qualification;
+ break;
+ case GUEST_LINEAR_ADDRESS:
+ *value = current_evmcs->guest_linear_address;
+ break;
+ case VM_EXIT_MSR_STORE_ADDR:
+ *value = current_evmcs->vm_exit_msr_store_addr;
+ break;
+ case VM_EXIT_MSR_LOAD_ADDR:
+ *value = current_evmcs->vm_exit_msr_load_addr;
+ break;
+ case VM_ENTRY_MSR_LOAD_ADDR:
+ *value = current_evmcs->vm_entry_msr_load_addr;
+ break;
+ case CR3_TARGET_VALUE0:
+ *value = current_evmcs->cr3_target_value0;
+ break;
+ case CR3_TARGET_VALUE1:
+ *value = current_evmcs->cr3_target_value1;
+ break;
+ case CR3_TARGET_VALUE2:
+ *value = current_evmcs->cr3_target_value2;
+ break;
+ case CR3_TARGET_VALUE3:
+ *value = current_evmcs->cr3_target_value3;
+ break;
+ case TPR_THRESHOLD:
+ *value = current_evmcs->tpr_threshold;
+ break;
+ case GUEST_INTERRUPTIBILITY_INFO:
+ *value = current_evmcs->guest_interruptibility_info;
+ break;
+ case CPU_BASED_VM_EXEC_CONTROL:
+ *value = current_evmcs->cpu_based_vm_exec_control;
+ break;
+ case EXCEPTION_BITMAP:
+ *value = current_evmcs->exception_bitmap;
+ break;
+ case VM_ENTRY_CONTROLS:
+ *value = current_evmcs->vm_entry_controls;
+ break;
+ case VM_ENTRY_INTR_INFO_FIELD:
+ *value = current_evmcs->vm_entry_intr_info_field;
+ break;
+ case VM_ENTRY_EXCEPTION_ERROR_CODE:
+ *value = current_evmcs->vm_entry_exception_error_code;
+ break;
+ case VM_ENTRY_INSTRUCTION_LEN:
+ *value = current_evmcs->vm_entry_instruction_len;
+ break;
+ case HOST_IA32_SYSENTER_CS:
+ *value = current_evmcs->host_ia32_sysenter_cs;
+ break;
+ case PIN_BASED_VM_EXEC_CONTROL:
+ *value = current_evmcs->pin_based_vm_exec_control;
+ break;
+ case VM_EXIT_CONTROLS:
+ *value = current_evmcs->vm_exit_controls;
+ break;
+ case SECONDARY_VM_EXEC_CONTROL:
+ *value = current_evmcs->secondary_vm_exec_control;
+ break;
+ case GUEST_ES_LIMIT:
+ *value = current_evmcs->guest_es_limit;
+ break;
+ case GUEST_CS_LIMIT:
+ *value = current_evmcs->guest_cs_limit;
+ break;
+ case GUEST_SS_LIMIT:
+ *value = current_evmcs->guest_ss_limit;
+ break;
+ case GUEST_DS_LIMIT:
+ *value = current_evmcs->guest_ds_limit;
+ break;
+ case GUEST_FS_LIMIT:
+ *value = current_evmcs->guest_fs_limit;
+ break;
+ case GUEST_GS_LIMIT:
+ *value = current_evmcs->guest_gs_limit;
+ break;
+ case GUEST_LDTR_LIMIT:
+ *value = current_evmcs->guest_ldtr_limit;
+ break;
+ case GUEST_TR_LIMIT:
+ *value = current_evmcs->guest_tr_limit;
+ break;
+ case GUEST_GDTR_LIMIT:
+ *value = current_evmcs->guest_gdtr_limit;
+ break;
+ case GUEST_IDTR_LIMIT:
+ *value = current_evmcs->guest_idtr_limit;
+ break;
+ case GUEST_ES_AR_BYTES:
+ *value = current_evmcs->guest_es_ar_bytes;
+ break;
+ case GUEST_CS_AR_BYTES:
+ *value = current_evmcs->guest_cs_ar_bytes;
+ break;
+ case GUEST_SS_AR_BYTES:
+ *value = current_evmcs->guest_ss_ar_bytes;
+ break;
+ case GUEST_DS_AR_BYTES:
+ *value = current_evmcs->guest_ds_ar_bytes;
+ break;
+ case GUEST_FS_AR_BYTES:
+ *value = current_evmcs->guest_fs_ar_bytes;
+ break;
+ case GUEST_GS_AR_BYTES:
+ *value = current_evmcs->guest_gs_ar_bytes;
+ break;
+ case GUEST_LDTR_AR_BYTES:
+ *value = current_evmcs->guest_ldtr_ar_bytes;
+ break;
+ case GUEST_TR_AR_BYTES:
+ *value = current_evmcs->guest_tr_ar_bytes;
+ break;
+ case GUEST_ACTIVITY_STATE:
+ *value = current_evmcs->guest_activity_state;
+ break;
+ case GUEST_SYSENTER_CS:
+ *value = current_evmcs->guest_sysenter_cs;
+ break;
+ case VM_INSTRUCTION_ERROR:
+ *value = current_evmcs->vm_instruction_error;
+ break;
+ case VM_EXIT_REASON:
+ *value = current_evmcs->vm_exit_reason;
+ break;
+ case VM_EXIT_INTR_INFO:
+ *value = current_evmcs->vm_exit_intr_info;
+ break;
+ case VM_EXIT_INTR_ERROR_CODE:
+ *value = current_evmcs->vm_exit_intr_error_code;
+ break;
+ case IDT_VECTORING_INFO_FIELD:
+ *value = current_evmcs->idt_vectoring_info_field;
+ break;
+ case IDT_VECTORING_ERROR_CODE:
+ *value = current_evmcs->idt_vectoring_error_code;
+ break;
+ case VM_EXIT_INSTRUCTION_LEN:
+ *value = current_evmcs->vm_exit_instruction_len;
+ break;
+ case VMX_INSTRUCTION_INFO:
+ *value = current_evmcs->vmx_instruction_info;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MASK:
+ *value = current_evmcs->page_fault_error_code_mask;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MATCH:
+ *value = current_evmcs->page_fault_error_code_match;
+ break;
+ case CR3_TARGET_COUNT:
+ *value = current_evmcs->cr3_target_count;
+ break;
+ case VM_EXIT_MSR_STORE_COUNT:
+ *value = current_evmcs->vm_exit_msr_store_count;
+ break;
+ case VM_EXIT_MSR_LOAD_COUNT:
+ *value = current_evmcs->vm_exit_msr_load_count;
+ break;
+ case VM_ENTRY_MSR_LOAD_COUNT:
+ *value = current_evmcs->vm_entry_msr_load_count;
+ break;
+ case HOST_ES_SELECTOR:
+ *value = current_evmcs->host_es_selector;
+ break;
+ case HOST_CS_SELECTOR:
+ *value = current_evmcs->host_cs_selector;
+ break;
+ case HOST_SS_SELECTOR:
+ *value = current_evmcs->host_ss_selector;
+ break;
+ case HOST_DS_SELECTOR:
+ *value = current_evmcs->host_ds_selector;
+ break;
+ case HOST_FS_SELECTOR:
+ *value = current_evmcs->host_fs_selector;
+ break;
+ case HOST_GS_SELECTOR:
+ *value = current_evmcs->host_gs_selector;
+ break;
+ case HOST_TR_SELECTOR:
+ *value = current_evmcs->host_tr_selector;
+ break;
+ case GUEST_ES_SELECTOR:
+ *value = current_evmcs->guest_es_selector;
+ break;
+ case GUEST_CS_SELECTOR:
+ *value = current_evmcs->guest_cs_selector;
+ break;
+ case GUEST_SS_SELECTOR:
+ *value = current_evmcs->guest_ss_selector;
+ break;
+ case GUEST_DS_SELECTOR:
+ *value = current_evmcs->guest_ds_selector;
+ break;
+ case GUEST_FS_SELECTOR:
+ *value = current_evmcs->guest_fs_selector;
+ break;
+ case GUEST_GS_SELECTOR:
+ *value = current_evmcs->guest_gs_selector;
+ break;
+ case GUEST_LDTR_SELECTOR:
+ *value = current_evmcs->guest_ldtr_selector;
+ break;
+ case GUEST_TR_SELECTOR:
+ *value = current_evmcs->guest_tr_selector;
+ break;
+ case VIRTUAL_PROCESSOR_ID:
+ *value = current_evmcs->virtual_processor_id;
+ break;
+ default: return 1;
+ }
+
+ return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+ switch (encoding) {
+ case GUEST_RIP:
+ current_evmcs->guest_rip = value;
+ break;
+ case GUEST_RSP:
+ current_evmcs->guest_rsp = value;
+ break;
+ case GUEST_RFLAGS:
+ current_evmcs->guest_rflags = value;
+ break;
+ case HOST_IA32_PAT:
+ current_evmcs->host_ia32_pat = value;
+ break;
+ case HOST_IA32_EFER:
+ current_evmcs->host_ia32_efer = value;
+ break;
+ case HOST_CR0:
+ current_evmcs->host_cr0 = value;
+ break;
+ case HOST_CR3:
+ current_evmcs->host_cr3 = value;
+ break;
+ case HOST_CR4:
+ current_evmcs->host_cr4 = value;
+ break;
+ case HOST_IA32_SYSENTER_ESP:
+ current_evmcs->host_ia32_sysenter_esp = value;
+ break;
+ case HOST_IA32_SYSENTER_EIP:
+ current_evmcs->host_ia32_sysenter_eip = value;
+ break;
+ case HOST_RIP:
+ current_evmcs->host_rip = value;
+ break;
+ case IO_BITMAP_A:
+ current_evmcs->io_bitmap_a = value;
+ break;
+ case IO_BITMAP_B:
+ current_evmcs->io_bitmap_b = value;
+ break;
+ case MSR_BITMAP:
+ current_evmcs->msr_bitmap = value;
+ break;
+ case GUEST_ES_BASE:
+ current_evmcs->guest_es_base = value;
+ break;
+ case GUEST_CS_BASE:
+ current_evmcs->guest_cs_base = value;
+ break;
+ case GUEST_SS_BASE:
+ current_evmcs->guest_ss_base = value;
+ break;
+ case GUEST_DS_BASE:
+ current_evmcs->guest_ds_base = value;
+ break;
+ case GUEST_FS_BASE:
+ current_evmcs->guest_fs_base = value;
+ break;
+ case GUEST_GS_BASE:
+ current_evmcs->guest_gs_base = value;
+ break;
+ case GUEST_LDTR_BASE:
+ current_evmcs->guest_ldtr_base = value;
+ break;
+ case GUEST_TR_BASE:
+ current_evmcs->guest_tr_base = value;
+ break;
+ case GUEST_GDTR_BASE:
+ current_evmcs->guest_gdtr_base = value;
+ break;
+ case GUEST_IDTR_BASE:
+ current_evmcs->guest_idtr_base = value;
+ break;
+ case TSC_OFFSET:
+ current_evmcs->tsc_offset = value;
+ break;
+ case VIRTUAL_APIC_PAGE_ADDR:
+ current_evmcs->virtual_apic_page_addr = value;
+ break;
+ case VMCS_LINK_POINTER:
+ current_evmcs->vmcs_link_pointer = value;
+ break;
+ case GUEST_IA32_DEBUGCTL:
+ current_evmcs->guest_ia32_debugctl = value;
+ break;
+ case GUEST_IA32_PAT:
+ current_evmcs->guest_ia32_pat = value;
+ break;
+ case GUEST_IA32_EFER:
+ current_evmcs->guest_ia32_efer = value;
+ break;
+ case GUEST_PDPTR0:
+ current_evmcs->guest_pdptr0 = value;
+ break;
+ case GUEST_PDPTR1:
+ current_evmcs->guest_pdptr1 = value;
+ break;
+ case GUEST_PDPTR2:
+ current_evmcs->guest_pdptr2 = value;
+ break;
+ case GUEST_PDPTR3:
+ current_evmcs->guest_pdptr3 = value;
+ break;
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ current_evmcs->guest_pending_dbg_exceptions = value;
+ break;
+ case GUEST_SYSENTER_ESP:
+ current_evmcs->guest_sysenter_esp = value;
+ break;
+ case GUEST_SYSENTER_EIP:
+ current_evmcs->guest_sysenter_eip = value;
+ break;
+ case CR0_GUEST_HOST_MASK:
+ current_evmcs->cr0_guest_host_mask = value;
+ break;
+ case CR4_GUEST_HOST_MASK:
+ current_evmcs->cr4_guest_host_mask = value;
+ break;
+ case CR0_READ_SHADOW:
+ current_evmcs->cr0_read_shadow = value;
+ break;
+ case CR4_READ_SHADOW:
+ current_evmcs->cr4_read_shadow = value;
+ break;
+ case GUEST_CR0:
+ current_evmcs->guest_cr0 = value;
+ break;
+ case GUEST_CR3:
+ current_evmcs->guest_cr3 = value;
+ break;
+ case GUEST_CR4:
+ current_evmcs->guest_cr4 = value;
+ break;
+ case GUEST_DR7:
+ current_evmcs->guest_dr7 = value;
+ break;
+ case HOST_FS_BASE:
+ current_evmcs->host_fs_base = value;
+ break;
+ case HOST_GS_BASE:
+ current_evmcs->host_gs_base = value;
+ break;
+ case HOST_TR_BASE:
+ current_evmcs->host_tr_base = value;
+ break;
+ case HOST_GDTR_BASE:
+ current_evmcs->host_gdtr_base = value;
+ break;
+ case HOST_IDTR_BASE:
+ current_evmcs->host_idtr_base = value;
+ break;
+ case HOST_RSP:
+ current_evmcs->host_rsp = value;
+ break;
+ case EPT_POINTER:
+ current_evmcs->ept_pointer = value;
+ break;
+ case GUEST_BNDCFGS:
+ current_evmcs->guest_bndcfgs = value;
+ break;
+ case XSS_EXIT_BITMAP:
+ current_evmcs->xss_exit_bitmap = value;
+ break;
+ case GUEST_PHYSICAL_ADDRESS:
+ current_evmcs->guest_physical_address = value;
+ break;
+ case EXIT_QUALIFICATION:
+ current_evmcs->exit_qualification = value;
+ break;
+ case GUEST_LINEAR_ADDRESS:
+ current_evmcs->guest_linear_address = value;
+ break;
+ case VM_EXIT_MSR_STORE_ADDR:
+ current_evmcs->vm_exit_msr_store_addr = value;
+ break;
+ case VM_EXIT_MSR_LOAD_ADDR:
+ current_evmcs->vm_exit_msr_load_addr = value;
+ break;
+ case VM_ENTRY_MSR_LOAD_ADDR:
+ current_evmcs->vm_entry_msr_load_addr = value;
+ break;
+ case CR3_TARGET_VALUE0:
+ current_evmcs->cr3_target_value0 = value;
+ break;
+ case CR3_TARGET_VALUE1:
+ current_evmcs->cr3_target_value1 = value;
+ break;
+ case CR3_TARGET_VALUE2:
+ current_evmcs->cr3_target_value2 = value;
+ break;
+ case CR3_TARGET_VALUE3:
+ current_evmcs->cr3_target_value3 = value;
+ break;
+ case TPR_THRESHOLD:
+ current_evmcs->tpr_threshold = value;
+ break;
+ case GUEST_INTERRUPTIBILITY_INFO:
+ current_evmcs->guest_interruptibility_info = value;
+ break;
+ case CPU_BASED_VM_EXEC_CONTROL:
+ current_evmcs->cpu_based_vm_exec_control = value;
+ break;
+ case EXCEPTION_BITMAP:
+ current_evmcs->exception_bitmap = value;
+ break;
+ case VM_ENTRY_CONTROLS:
+ current_evmcs->vm_entry_controls = value;
+ break;
+ case VM_ENTRY_INTR_INFO_FIELD:
+ current_evmcs->vm_entry_intr_info_field = value;
+ break;
+ case VM_ENTRY_EXCEPTION_ERROR_CODE:
+ current_evmcs->vm_entry_exception_error_code = value;
+ break;
+ case VM_ENTRY_INSTRUCTION_LEN:
+ current_evmcs->vm_entry_instruction_len = value;
+ break;
+ case HOST_IA32_SYSENTER_CS:
+ current_evmcs->host_ia32_sysenter_cs = value;
+ break;
+ case PIN_BASED_VM_EXEC_CONTROL:
+ current_evmcs->pin_based_vm_exec_control = value;
+ break;
+ case VM_EXIT_CONTROLS:
+ current_evmcs->vm_exit_controls = value;
+ break;
+ case SECONDARY_VM_EXEC_CONTROL:
+ current_evmcs->secondary_vm_exec_control = value;
+ break;
+ case GUEST_ES_LIMIT:
+ current_evmcs->guest_es_limit = value;
+ break;
+ case GUEST_CS_LIMIT:
+ current_evmcs->guest_cs_limit = value;
+ break;
+ case GUEST_SS_LIMIT:
+ current_evmcs->guest_ss_limit = value;
+ break;
+ case GUEST_DS_LIMIT:
+ current_evmcs->guest_ds_limit = value;
+ break;
+ case GUEST_FS_LIMIT:
+ current_evmcs->guest_fs_limit = value;
+ break;
+ case GUEST_GS_LIMIT:
+ current_evmcs->guest_gs_limit = value;
+ break;
+ case GUEST_LDTR_LIMIT:
+ current_evmcs->guest_ldtr_limit = value;
+ break;
+ case GUEST_TR_LIMIT:
+ current_evmcs->guest_tr_limit = value;
+ break;
+ case GUEST_GDTR_LIMIT:
+ current_evmcs->guest_gdtr_limit = value;
+ break;
+ case GUEST_IDTR_LIMIT:
+ current_evmcs->guest_idtr_limit = value;
+ break;
+ case GUEST_ES_AR_BYTES:
+ current_evmcs->guest_es_ar_bytes = value;
+ break;
+ case GUEST_CS_AR_BYTES:
+ current_evmcs->guest_cs_ar_bytes = value;
+ break;
+ case GUEST_SS_AR_BYTES:
+ current_evmcs->guest_ss_ar_bytes = value;
+ break;
+ case GUEST_DS_AR_BYTES:
+ current_evmcs->guest_ds_ar_bytes = value;
+ break;
+ case GUEST_FS_AR_BYTES:
+ current_evmcs->guest_fs_ar_bytes = value;
+ break;
+ case GUEST_GS_AR_BYTES:
+ current_evmcs->guest_gs_ar_bytes = value;
+ break;
+ case GUEST_LDTR_AR_BYTES:
+ current_evmcs->guest_ldtr_ar_bytes = value;
+ break;
+ case GUEST_TR_AR_BYTES:
+ current_evmcs->guest_tr_ar_bytes = value;
+ break;
+ case GUEST_ACTIVITY_STATE:
+ current_evmcs->guest_activity_state = value;
+ break;
+ case GUEST_SYSENTER_CS:
+ current_evmcs->guest_sysenter_cs = value;
+ break;
+ case VM_INSTRUCTION_ERROR:
+ current_evmcs->vm_instruction_error = value;
+ break;
+ case VM_EXIT_REASON:
+ current_evmcs->vm_exit_reason = value;
+ break;
+ case VM_EXIT_INTR_INFO:
+ current_evmcs->vm_exit_intr_info = value;
+ break;
+ case VM_EXIT_INTR_ERROR_CODE:
+ current_evmcs->vm_exit_intr_error_code = value;
+ break;
+ case IDT_VECTORING_INFO_FIELD:
+ current_evmcs->idt_vectoring_info_field = value;
+ break;
+ case IDT_VECTORING_ERROR_CODE:
+ current_evmcs->idt_vectoring_error_code = value;
+ break;
+ case VM_EXIT_INSTRUCTION_LEN:
+ current_evmcs->vm_exit_instruction_len = value;
+ break;
+ case VMX_INSTRUCTION_INFO:
+ current_evmcs->vmx_instruction_info = value;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MASK:
+ current_evmcs->page_fault_error_code_mask = value;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MATCH:
+ current_evmcs->page_fault_error_code_match = value;
+ break;
+ case CR3_TARGET_COUNT:
+ current_evmcs->cr3_target_count = value;
+ break;
+ case VM_EXIT_MSR_STORE_COUNT:
+ current_evmcs->vm_exit_msr_store_count = value;
+ break;
+ case VM_EXIT_MSR_LOAD_COUNT:
+ current_evmcs->vm_exit_msr_load_count = value;
+ break;
+ case VM_ENTRY_MSR_LOAD_COUNT:
+ current_evmcs->vm_entry_msr_load_count = value;
+ break;
+ case HOST_ES_SELECTOR:
+ current_evmcs->host_es_selector = value;
+ break;
+ case HOST_CS_SELECTOR:
+ current_evmcs->host_cs_selector = value;
+ break;
+ case HOST_SS_SELECTOR:
+ current_evmcs->host_ss_selector = value;
+ break;
+ case HOST_DS_SELECTOR:
+ current_evmcs->host_ds_selector = value;
+ break;
+ case HOST_FS_SELECTOR:
+ current_evmcs->host_fs_selector = value;
+ break;
+ case HOST_GS_SELECTOR:
+ current_evmcs->host_gs_selector = value;
+ break;
+ case HOST_TR_SELECTOR:
+ current_evmcs->host_tr_selector = value;
+ break;
+ case GUEST_ES_SELECTOR:
+ current_evmcs->guest_es_selector = value;
+ break;
+ case GUEST_CS_SELECTOR:
+ current_evmcs->guest_cs_selector = value;
+ break;
+ case GUEST_SS_SELECTOR:
+ current_evmcs->guest_ss_selector = value;
+ break;
+ case GUEST_DS_SELECTOR:
+ current_evmcs->guest_ds_selector = value;
+ break;
+ case GUEST_FS_SELECTOR:
+ current_evmcs->guest_fs_selector = value;
+ break;
+ case GUEST_GS_SELECTOR:
+ current_evmcs->guest_gs_selector = value;
+ break;
+ case GUEST_LDTR_SELECTOR:
+ current_evmcs->guest_ldtr_selector = value;
+ break;
+ case GUEST_TR_SELECTOR:
+ current_evmcs->guest_tr_selector = value;
+ break;
+ case VIRTUAL_PROCESSOR_ID:
+ current_evmcs->virtual_processor_id = value;
+ break;
+ default: return 1;
+ }
+
+ return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+ int ret;
+
+ current_evmcs->hv_clean_fields = 0;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "mov %%rsp, (%[host_rsp]);"
+ "lea 1f(%%rip), %%rax;"
+ "mov %%rax, (%[host_rip]);"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"
+ ((uint64_t)&current_evmcs->host_rsp),
+ [host_rip]"r"
+ ((uint64_t)&current_evmcs->host_rip)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+ int ret;
+
+ current_evmcs->hv_clean_fields = 0;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "mov %%rsp, (%[host_rsp]);"
+ "lea 1f(%%rip), %%rax;"
+ "mov %%rax, (%[host_rip]);"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"
+ ((uint64_t)&current_evmcs->host_rsp),
+ [host_rip]"r"
+ ((uint64_t)&current_evmcs->host_rip)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 3acf9a91704c..a4e59e3b4826 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,7 +7,7 @@
*
*/
#ifndef SELFTEST_KVM_UTIL_H
-#define SELFTEST_KVM_UTIL_H 1
+#define SELFTEST_KVM_UTIL_H
#include "test_util.h"
@@ -17,12 +17,6 @@
#include "sparsebit.h"
-/*
- * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
- * created. Only applies to VMs using EPT.
- */
-#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
-
/* Callers of kvm_util only have an incomplete/opaque description of the
* structure kvm_util is using to maintain the state of a VM.
@@ -33,16 +27,23 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_UTIL_MIN_VADDR 0x2000
#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
+#define DEFAULT_STACK_PGS 5
enum vm_guest_mode {
- VM_MODE_FLAT48PG,
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_64K,
+ NUM_VM_MODES,
};
+#define vm_guest_mode_string(m) vm_guest_mode_string[m]
+extern const char * const vm_guest_mode_string[];
+
enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
@@ -58,15 +59,15 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-int kvm_memcmp_hva_gva(void *hva,
- struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
+ uint32_t data_memslot, uint32_t pgd_memslot);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
- uint32_t vcpuid, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+ uint8_t indent);
void vm_create_irqchip(struct kvm_vm *vm);
@@ -75,13 +76,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags);
-void vcpu_ioctl(struct kvm_vm *vm,
- uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+ int gdt_memslot);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
+ uint32_t data_memslot, uint32_t pgd_memslot);
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
size_t size, uint32_t pgd_memslot);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
@@ -93,56 +95,35 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-void vcpu_regs_get(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_regs *regs);
+ struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-void vcpu_sregs_get(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
+ struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
+ struct kvm_vcpu_events *events);
const char *exit_reason_str(unsigned int exit_reason);
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot);
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
- vm_paddr_t paddr_min, uint32_t memslot);
-
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-void vcpu_set_cpuid(
- struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
-
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
-
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
-{
- return kvm_get_supported_cpuid_index(function, 0);
-}
+ uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot);
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
void *guest_code);
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
- vm_paddr_t vmxon_paddr,
- vm_vaddr_t vmcs_vaddr,
- vm_paddr_t vmcs_paddr);
-
struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
uint64_t end);
@@ -152,43 +133,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-#define GUEST_PORT_SYNC 0x1000
-#define GUEST_PORT_ABORT 0x1001
-#define GUEST_PORT_DONE 0x1002
-
-static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
- __asm__ __volatile__("in %[port], %%al"
- :
- : [port]"d"(port), "D"(arg0), "S"(arg1)
- : "rax");
-}
-
-/*
- * Allows to pass three arguments to the host: port is 16bit wide,
- * arg0 & arg1 are 64bit wide
- */
-#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
- __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do { \
- if (!(_condition)) \
- GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \
- "Failed guest assert: " \
- #_condition, __LINE__); \
- } while (0)
-
-#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+/* ucall implementation types */
+typedef enum {
+ UCALL_PIO,
+ UCALL_MMIO,
+} ucall_type_t;
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_DONE,
+};
-#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+#define UCALL_MAX_ARGS 6
-struct guest_args {
- uint64_t arg0;
- uint64_t arg1;
- uint16_t port;
-} __attribute__ ((packed));
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+};
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
- struct guest_args *args);
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2, \
+ "Failed guest assert: " \
+ #_condition, __LINE__); \
+} while (0)
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 54cfeb6568d3..31e030915c1f 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -15,8 +15,8 @@
* even in the case where most bits are set.
*/
-#ifndef _TEST_SPARSEBIT_H_
-#define _TEST_SPARSEBIT_H_
+#ifndef SELFTEST_KVM_SPARSEBIT_H
+#define SELFTEST_KVM_SPARSEBIT_H
#include <stdbool.h>
#include <stdint.h>
@@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit);
}
#endif
-#endif /* _TEST_SPARSEBIT_H_ */
+#endif /* SELFTEST_KVM_SPARSEBIT_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 73c3933436ec..c7dafe8bd02c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -7,8 +7,8 @@
*
*/
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H 1
+#ifndef SELFTEST_KVM_TEST_UTIL_H
+#define SELFTEST_KVM_TEST_UTIL_H
#include <stdlib.h>
#include <stdarg.h>
@@ -41,4 +41,4 @@ void test_assert(bool exp, const char *exp_str,
#a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
} while (0)
-#endif /* TEST_UTIL_H */
+#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 42c3596815b8..e2884c2b81ff 100644
--- a/tools/testing/selftests/kvm/include/x86.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1,5 +1,5 @@
/*
- * tools/testing/selftests/kvm/include/x86.h
+ * tools/testing/selftests/kvm/include/x86_64/processor.h
*
* Copyright (C) 2018, Google LLC.
*
@@ -7,8 +7,8 @@
*
*/
-#ifndef SELFTEST_KVM_X86_H
-#define SELFTEST_KVM_X86_H
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
#include <assert.h>
#include <stdint.h>
@@ -305,7 +305,25 @@ static inline unsigned long get_xmm(int n)
struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_x86_state *state);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return kvm_get_supported_cpuid_index(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
/*
* Basic CPU control in CR0
@@ -1044,4 +1062,4 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
-#endif /* !SELFTEST_KVM_X86_H */
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index b9ffe1024d3a..c9bd935b939c 100644
--- a/tools/testing/selftests/kvm/include/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -1,5 +1,5 @@
/*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/vmx.h
*
* Copyright (C) 2018, Google LLC.
*
@@ -11,7 +11,7 @@
#define SELFTEST_KVM_VMX_H
#include <stdint.h>
-#include "x86.h"
+#include "processor.h"
#define CPUID_VMX_BIT 5
@@ -339,6 +339,8 @@ struct vmx_msr_entry {
uint64_t value;
} __attribute__ ((aligned(16)));
+#include "evmcs.h"
+
static inline int vmxon(uint64_t phys)
{
uint8_t ret;
@@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa)
{
uint8_t ret;
+ if (enable_evmcs)
+ return -1;
+
__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
: [ret]"=rm"(ret)
: [pa]"m"(vmcs_pa)
@@ -385,6 +390,9 @@ static inline int vmptrst(uint64_t *value)
uint64_t tmp;
uint8_t ret;
+ if (enable_evmcs)
+ return evmcs_vmptrst(value);
+
__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
: [value]"=m"(tmp), [ret]"=rm"(ret)
: : "cc", "memory");
@@ -411,6 +419,9 @@ static inline int vmlaunch(void)
{
int ret;
+ if (enable_evmcs)
+ return evmcs_vmlaunch();
+
__asm__ __volatile__("push %%rbp;"
"push %%rcx;"
"push %%rdx;"
@@ -443,6 +454,9 @@ static inline int vmresume(void)
{
int ret;
+ if (enable_evmcs)
+ return evmcs_vmresume();
+
__asm__ __volatile__("push %%rbp;"
"push %%rcx;"
"push %%rdx;"
@@ -482,6 +496,9 @@ static inline int vmread(uint64_t encoding, uint64_t *value)
uint64_t tmp;
uint8_t ret;
+ if (enable_evmcs)
+ return evmcs_vmread(encoding, value);
+
__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
: [value]"=rm"(tmp), [ret]"=rm"(ret)
: [encoding]"r"(encoding)
@@ -506,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value)
{
uint8_t ret;
+ if (enable_evmcs)
+ return evmcs_vmwrite(encoding, value);
+
__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
: [ret]"=rm"(ret)
: [value]"rm"(value), [encoding]"r"(encoding)
@@ -543,10 +563,19 @@ struct vmx_pages {
void *vmwrite_hva;
uint64_t vmwrite_gpa;
void *vmwrite;
+
+ void *vp_assist_hva;
+ uint64_t vp_assist_gpa;
+ void *vp_assist;
+
+ void *enlightened_vmcs_hva;
+ uint64_t enlightened_vmcs_gpa;
+ void *enlightened_vmcs;
};
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
-#endif /* !SELFTEST_KVM_VMX_H */
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
new file mode 100644
index 000000000000..b6022e2f116e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels == 4,
+ "Mode %d does not have 4 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels >= 3,
+ "Mode %d does not have >= 3 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> vm->page_shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
+ return entry & mask;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+ int rc;
+
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+ page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot, uint64_t flags)
+{
+ uint8_t attr_idx = flags & 7;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ if (!*ptep) {
+ *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ *ptep |= 3;
+ }
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+ if (!*ptep) {
+ *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ *ptep |= 3;
+ }
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+ if (!*ptep) {
+ *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ *ptep |= 3;
+ }
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+ break;
+ default:
+ TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+ }
+
+ *ptep = paddr | 3;
+ *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot)
+{
+ uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+
+ _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ break;
+ default:
+ TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "gva: 0x%lx", gva);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG_VM
+ static const char * const type[] = { "", "pud", "pmd", "pte" };
+ uint64_t pte, *ptep;
+
+ if (level == 4)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+ }
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = 4 - (vm->pgtable_levels - 1);
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+ }
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ uint64_t ptrs_per_4k_pte = 512;
+ uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
+ struct kvm_vm *vm;
+
+ vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ size_t stack_size = vm->page_size == 4096 ?
+ DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ vm_vcpu_add(vm, vcpuid, 0, 0);
+
+ set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+ struct kvm_vcpu_init init;
+ uint64_t sctlr_el1, tcr_el1;
+
+ memset(&init, 0, sizeof(init));
+ init.target = KVM_ARM_TARGET_GENERIC_V8;
+ vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
+
+ /*
+ * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+ * registers, which the variable argument list macros do.
+ */
+ set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+
+ get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
+ get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P40V48_4K:
+ tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ break;
+ case VM_MODE_P40V48_64K:
+ tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+ /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+ tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+ tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+
+ set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
+ set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
+ set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
+ set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ uint64_t pstate, pc;
+
+ get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
+ get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+
+ fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
+ indent, "", pstate, pc);
+
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index cd01144d27c8..6398efe67885 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,7 +13,7 @@
#include <execinfo.h>
#include <sys/syscall.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* Dumps the current stack trace to stderr. */
static void __attribute__((noinline)) test_dump_stack(void);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6fd8c089cafc..8c06da4f03db 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -16,10 +16,8 @@
#include <sys/stat.h>
#include <linux/kernel.h>
-#define KVM_DEV_PATH "/dev/kvm"
-
#define KVM_UTIL_PGS_PER_HUGEPG 512
-#define KVM_UTIL_MIN_PADDR 0x2000
+#define KVM_UTIL_MIN_PFN 2
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static void *align(void *x, size_t size)
@@ -30,7 +28,8 @@ static void *align(void *x, size_t size)
return (void *) (((size_t) x + mask) & ~mask);
}
-/* Capability
+/*
+ * Capability
*
* Input Args:
* cap - Capability
@@ -92,16 +91,23 @@ static void vm_open(struct kvm_vm *vm, int perm)
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
- /* Create VM. */
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
}
-/* VM Create
+const char * const vm_guest_mode_string[] = {
+ "PA-bits:52, VA-bits:48, 4K pages",
+ "PA-bits:52, VA-bits:48, 64K pages",
+ "PA-bits:40, VA-bits:48, 4K pages",
+ "PA-bits:40, VA-bits:48, 64K pages",
+};
+
+/*
+ * VM Create
*
* Input Args:
- * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
* phy_pages - Physical memory pages
* perm - permission
*
@@ -110,7 +116,7 @@ static void vm_open(struct kvm_vm *vm, int perm)
* Return:
* Pointer to opaque structure that describes the created VM.
*
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
* When phy_pages is non-zero, a memory region of phy_pages physical pages
* is created and mapped starting at guest physical address 0. The file
* descriptor to control the created VM is created with the permissions
@@ -121,7 +127,6 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
struct kvm_vm *vm;
int kvm_fd;
- /* Allocate memory. */
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficent Memory");
@@ -130,26 +135,48 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
/* Setup mode specific traits. */
switch (vm->mode) {
- case VM_MODE_FLAT48PG:
+ case VM_MODE_P52V48_4K:
+ vm->pgtable_levels = 4;
vm->page_size = 0x1000;
vm->page_shift = 12;
-
- /* Limit to 48-bit canonical virtual addresses. */
- vm->vpages_valid = sparsebit_alloc();
- sparsebit_set_num(vm->vpages_valid,
- 0, (1ULL << (48 - 1)) >> vm->page_shift);
- sparsebit_set_num(vm->vpages_valid,
- (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
- (1ULL << (48 - 1)) >> vm->page_shift);
-
- /* Limit physical addresses to 52-bits. */
- vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+ vm->va_bits = 48;
+ break;
+ case VM_MODE_P52V48_64K:
+ vm->pgtable_levels = 3;
+ vm->pa_bits = 52;
+ vm->page_size = 0x10000;
+ vm->page_shift = 16;
+ vm->va_bits = 48;
+ break;
+ case VM_MODE_P40V48_4K:
+ vm->pgtable_levels = 4;
+ vm->pa_bits = 40;
+ vm->va_bits = 48;
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+ break;
+ case VM_MODE_P40V48_64K:
+ vm->pgtable_levels = 3;
+ vm->pa_bits = 40;
+ vm->va_bits = 48;
+ vm->page_size = 0x10000;
+ vm->page_shift = 16;
break;
-
default:
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
}
+ /* Limit to VA-bit canonical virtual addresses. */
+ vm->vpages_valid = sparsebit_alloc();
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to PA-bits. */
+ vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
if (phy_pages != 0)
@@ -159,7 +186,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
return vm;
}
-/* VM Restart
+/*
+ * VM Restart
*
* Input Args:
* vm - VM that has been released before
@@ -186,7 +214,8 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
" guest_phys_addr: 0x%lx size: 0x%lx",
- ret, errno, region->region.slot, region->region.flags,
+ ret, errno, region->region.slot,
+ region->region.flags,
region->region.guest_phys_addr,
region->region.memory_size);
}
@@ -202,7 +231,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
strerror(-ret));
}
-/* Userspace Memory Region Find
+/*
+ * Userspace Memory Region Find
*
* Input Args:
* vm - Virtual Machine
@@ -220,8 +250,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
* of the regions is returned. Null is returned only when no overlapping
* region exists.
*/
-static struct userspace_mem_region *userspace_mem_region_find(
- struct kvm_vm *vm, uint64_t start, uint64_t end)
+static struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
struct userspace_mem_region *region;
@@ -237,7 +267,8 @@ static struct userspace_mem_region *userspace_mem_region_find(
return NULL;
}
-/* KVM Userspace Memory Region Find
+/*
+ * KVM Userspace Memory Region Find
*
* Input Args:
* vm - Virtual Machine
@@ -265,7 +296,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
return &region->region;
}
-/* VCPU Find
+/*
+ * VCPU Find
*
* Input Args:
* vm - Virtual Machine
@@ -280,8 +312,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
* returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
* for the specified vcpuid.
*/
-struct vcpu *vcpu_find(struct kvm_vm *vm,
- uint32_t vcpuid)
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpup;
@@ -293,7 +324,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
return NULL;
}
-/* VM VCPU Remove
+/*
+ * VM VCPU Remove
*
* Input Args:
* vm - Virtual Machine
@@ -330,11 +362,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
{
int ret;
- /* Free VCPUs. */
while (vmp->vcpu_head)
vm_vcpu_rm(vmp, vmp->vcpu_head->id);
- /* Close file descriptor for the VM. */
ret = close(vmp->fd);
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
" vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
@@ -344,7 +374,8 @@ void kvm_vm_release(struct kvm_vm *vmp)
" vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
}
-/* Destroys and frees the VM pointed to by vmp.
+/*
+ * Destroys and frees the VM pointed to by vmp.
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
@@ -383,7 +414,8 @@ void kvm_vm_free(struct kvm_vm *vmp)
free(vmp);
}
-/* Memory Compare, host virtual to guest virtual
+/*
+ * Memory Compare, host virtual to guest virtual
*
* Input Args:
* hva - Starting host virtual address
@@ -405,23 +437,25 @@ void kvm_vm_free(struct kvm_vm *vmp)
* a length of len, to the guest bytes starting at the guest virtual
* address given by gva.
*/
-int kvm_memcmp_hva_gva(void *hva,
- struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
{
size_t amt;
- /* Compare a batch of bytes until either a match is found
+ /*
+ * Compare a batch of bytes until either a match is found
* or all the bytes have been compared.
*/
for (uintptr_t offset = 0; offset < len; offset += amt) {
uintptr_t ptr1 = (uintptr_t)hva + offset;
- /* Determine host address for guest virtual address
+ /*
+ * Determine host address for guest virtual address
* at offset.
*/
uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
- /* Determine amount to compare on this pass.
+ /*
+ * Determine amount to compare on this pass.
* Don't allow the comparsion to cross a page boundary.
*/
amt = len - offset;
@@ -433,7 +467,8 @@ int kvm_memcmp_hva_gva(void *hva,
assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
- /* Perform the comparison. If there is a difference
+ /*
+ * Perform the comparison. If there is a difference
* return that result to the caller, otherwise need
* to continue on looking for a mismatch.
*/
@@ -442,109 +477,15 @@ int kvm_memcmp_hva_gva(void *hva,
return ret;
}
- /* No mismatch found. Let the caller know the two memory
+ /*
+ * No mismatch found. Let the caller know the two memory
* areas are equal.
*/
return 0;
}
-/* Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
- struct kvm_cpuid2 *cpuid;
- int nent = 100;
- size_t size;
-
- size = sizeof(*cpuid);
- size += nent * sizeof(struct kvm_cpuid_entry2);
- cpuid = malloc(size);
- if (!cpuid) {
- perror("malloc");
- abort();
- }
-
- cpuid->nent = nent;
-
- return cpuid;
-}
-
-/* KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
- static struct kvm_cpuid2 *cpuid;
- int ret;
- int kvm_fd;
-
- if (cpuid)
- return cpuid;
-
- cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
- ret, errno);
-
- close(kvm_fd);
- return cpuid;
-}
-
-/* Locate a cpuid entry.
- *
- * Input Args:
- * cpuid: The cpuid.
- * function: The function of the cpuid entry to find.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
-{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry = NULL;
- int i;
-
- cpuid = kvm_get_supported_cpuid();
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index) {
- entry = &cpuid->entries[i];
- break;
- }
- }
-
- TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
- function, index);
- return entry;
-}
-
-/* VM Userspace Memory Region Add
+/*
+ * VM Userspace Memory Region Add
*
* Input Args:
* vm - Virtual Machine
@@ -586,7 +527,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
guest_paddr, npages, vm->max_gfn, vm->page_size);
- /* Confirm a mem region with an overlapping address doesn't
+ /*
+ * Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
@@ -677,7 +619,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
vm->userspace_mem_region_head = region;
}
-/* Memslot to region
+/*
+ * Memslot to region
*
* Input Args:
* vm - Virtual Machine
@@ -691,8 +634,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
* on error (e.g. currently no memory region using memslot as a KVM
* memory slot ID).
*/
-static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
- uint32_t memslot)
+static struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
@@ -712,7 +655,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
return region;
}
-/* VM Memory Region Flags Set
+/*
+ * VM Memory Region Flags Set
*
* Input Args:
* vm - Virtual Machine
@@ -730,7 +674,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
int ret;
struct userspace_mem_region *region;
- /* Locate memory region. */
region = memslot2region(vm, slot);
region->region.flags = flags;
@@ -742,7 +685,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
-/* VCPU mmap Size
+/*
+ * VCPU mmap Size
*
* Input Args: None
*
@@ -772,7 +716,8 @@ static int vcpu_mmap_sz(void)
return ret;
}
-/* VM VCPU Add
+/*
+ * VM VCPU Add
*
* Input Args:
* vm - Virtual Machine
@@ -785,7 +730,8 @@ static int vcpu_mmap_sz(void)
* Creates and adds to the VM specified by vm and virtual CPU with
* the ID given by vcpuid.
*/
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+ int gdt_memslot)
{
struct vcpu *vcpu;
@@ -823,7 +769,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
}
-/* VM Virtual Address Unused Gap
+/*
+ * VM Virtual Address Unused Gap
*
* Input Args:
* vm - Virtual Machine
@@ -843,14 +790,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
* sz unallocated bytes >= vaddr_min is available.
*/
static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min)
+ vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
/* Determine lowest permitted virtual page index. */
uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
if ((pgidx_start * vm->page_size) < vaddr_min)
- goto no_va_found;
+ goto no_va_found;
/* Loop over section with enough valid virtual page indexes. */
if (!sparsebit_is_set_num(vm->vpages_valid,
@@ -909,7 +856,8 @@ va_found:
return pgidx_start * vm->page_size;
}
-/* VM Virtual Address Allocate
+/*
+ * VM Virtual Address Allocate
*
* Input Args:
* vm - Virtual Machine
@@ -930,13 +878,14 @@ va_found:
* a page.
*/
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+ uint32_t data_memslot, uint32_t pgd_memslot)
{
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
virt_pgd_alloc(vm, pgd_memslot);
- /* Find an unused range of virtual page addresses of at least
+ /*
+ * Find an unused range of virtual page addresses of at least
* pages in length.
*/
vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
@@ -946,7 +895,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
pages--, vaddr += vm->page_size) {
vm_paddr_t paddr;
- paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+ paddr = vm_phy_page_alloc(vm,
+ KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
virt_pg_map(vm, vaddr, paddr, pgd_memslot);
@@ -990,7 +940,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
}
}
-/* Address VM Physical to Host Virtual
+/*
+ * Address VM Physical to Host Virtual
*
* Input Args:
* vm - Virtual Machine
@@ -1022,7 +973,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
return NULL;
}
-/* Address Host Virtual to VM Physical
+/*
+ * Address Host Virtual to VM Physical
*
* Input Args:
* vm - Virtual Machine
@@ -1056,7 +1008,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
return -1;
}
-/* VM Create IRQ Chip
+/*
+ * VM Create IRQ Chip
*
* Input Args:
* vm - Virtual Machine
@@ -1078,7 +1031,8 @@ void vm_create_irqchip(struct kvm_vm *vm)
vm->has_irqchip = true;
}
-/* VM VCPU State
+/*
+ * VM VCPU State
*
* Input Args:
* vm - Virtual Machine
@@ -1100,7 +1054,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
return vcpu->state;
}
-/* VM VCPU Run
+/*
+ * VM VCPU Run
*
* Input Args:
* vm - Virtual Machine
@@ -1126,13 +1081,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
int rc;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- do {
+ do {
rc = ioctl(vcpu->fd, KVM_RUN, NULL);
} while (rc == -1 && errno == EINTR);
return rc;
}
-/* VM VCPU Set MP State
+/*
+ * VM VCPU Set MP State
*
* Input Args:
* vm - Virtual Machine
@@ -1147,7 +1103,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
* by mp_state.
*/
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state)
+ struct kvm_mp_state *mp_state)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
@@ -1159,7 +1115,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
"rc: %i errno: %i", ret, errno);
}
-/* VM VCPU Regs Get
+/*
+ * VM VCPU Regs Get
*
* Input Args:
* vm - Virtual Machine
@@ -1173,21 +1130,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
* Obtains the current register state for the VCPU specified by vcpuid
* and stores it at the location given by regs.
*/
-void vcpu_regs_get(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
ret, errno);
}
-/* VM VCPU Regs Set
+/*
+ * VM VCPU Regs Set
*
* Input Args:
* vm - Virtual Machine
@@ -1201,165 +1157,46 @@ void vcpu_regs_get(struct kvm_vm *vm,
* Sets the regs of the VCPU specified by vcpuid to the values
* given by regs.
*/
-void vcpu_regs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Set the regs. */
ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
ret, errno);
}
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
+ struct kvm_vcpu_events *events)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno);
}
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
+ struct kvm_vcpu_events *events)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Set the regs. */
ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno);
}
-/* VCPU Get MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
-
- return buffer.entry.data;
-}
-
-/* VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- memset(&buffer, 0, sizeof(buffer));
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- buffer.entry.data = msr_value;
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
-}
-
-/* VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args. Each of the variable args is expected to
- * be of type uint64_t.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
- va_list ap;
- struct kvm_regs regs;
-
- TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u\n",
- num);
-
- va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
-
- if (num >= 1)
- regs.rdi = va_arg(ap, uint64_t);
-
- if (num >= 2)
- regs.rsi = va_arg(ap, uint64_t);
-
- if (num >= 3)
- regs.rdx = va_arg(ap, uint64_t);
-
- if (num >= 4)
- regs.rcx = va_arg(ap, uint64_t);
-
- if (num >= 5)
- regs.r8 = va_arg(ap, uint64_t);
-
- if (num >= 6)
- regs.r9 = va_arg(ap, uint64_t);
-
- vcpu_regs_set(vm, vcpuid, &regs);
- va_end(ap);
-}
-
-/* VM VCPU System Regs Get
+/*
+ * VM VCPU System Regs Get
*
* Input Args:
* vm - Virtual Machine
@@ -1373,22 +1210,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
* Obtains the current system register state for the VCPU specified by
* vcpuid and stores it at the location given by sregs.
*/
-void vcpu_sregs_get(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Get the regs. */
- /* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
ret, errno);
}
-/* VM VCPU System Regs Set
+/*
+ * VM VCPU System Regs Set
*
* Input Args:
* vm - Virtual Machine
@@ -1402,27 +1237,25 @@ void vcpu_sregs_get(struct kvm_vm *vm,
* Sets the system regs of the VCPU specified by vcpuid to the values
* given by sregs.
*/
-void vcpu_sregs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
{
int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
"rc: %i errno: %i", ret, errno);
}
-int _vcpu_sregs_set(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_sregs *sregs)
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- /* Get the regs. */
return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
}
-/* VCPU Ioctl
+/*
+ * VCPU Ioctl
*
* Input Args:
* vm - Virtual Machine
@@ -1434,8 +1267,8 @@ int _vcpu_sregs_set(struct kvm_vm *vm,
*
* Issues an arbitrary ioctl on a VCPU fd.
*/
-void vcpu_ioctl(struct kvm_vm *vm,
- uint32_t vcpuid, unsigned long cmd, void *arg)
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+ unsigned long cmd, void *arg)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
@@ -1447,7 +1280,8 @@ void vcpu_ioctl(struct kvm_vm *vm,
cmd, ret, errno, strerror(errno));
}
-/* VM Ioctl
+/*
+ * VM Ioctl
*
* Input Args:
* vm - Virtual Machine
@@ -1467,7 +1301,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
cmd, ret, errno, strerror(errno));
}
-/* VM Dump
+/*
+ * VM Dump
*
* Input Args:
* vm - Virtual Machine
@@ -1514,38 +1349,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
vcpu_dump(stream, vm, vcpu->id, indent + 2);
}
-/* VM VCPU Dump
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args:
- * stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
- uint32_t vcpuid, uint8_t indent)
-{
- struct kvm_regs regs;
- struct kvm_sregs sregs;
-
- fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
-
- fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vm, vcpuid, &regs);
- regs_dump(stream, &regs, indent + 4);
-
- fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vm, vcpuid, &sregs);
- sregs_dump(stream, &sregs, indent + 4);
-}
-
/* Known KVM exit reasons */
static struct exit_reason {
unsigned int reason;
@@ -1576,7 +1379,8 @@ static struct exit_reason {
#endif
};
-/* Exit Reason String
+/*
+ * Exit Reason String
*
* Input Args:
* exit_reason - Exit reason
@@ -1602,10 +1406,12 @@ const char *exit_reason_str(unsigned int exit_reason)
return "Unknown";
}
-/* Physical Page Allocate
+/*
+ * Physical Contiguous Page Allocator
*
* Input Args:
* vm - Virtual Machine
+ * num - number of pages
* paddr_min - Physical address minimum
* memslot - Memory region to allocate page from
*
@@ -1614,47 +1420,59 @@ const char *exit_reason_str(unsigned int exit_reason)
* Return:
* Starting physical address
*
- * Within the VM specified by vm, locates an available physical page
- * at or above paddr_min. If found, the page is marked as in use
- * and its address is returned. A TEST_ASSERT failure occurs if no
- * page is available at or above paddr_min.
+ * Within the VM specified by vm, locates a range of available physical
+ * pages at or above paddr_min. If found, the pages are marked as in use
+ * and thier base address is returned. A TEST_ASSERT failure occurs if
+ * not enough pages are available at or above paddr_min.
*/
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
- vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
{
struct userspace_mem_region *region;
- sparsebit_idx_t pg;
+ sparsebit_idx_t pg, base;
+
+ TEST_ASSERT(num > 0, "Must allocate at least one page");
TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
"not divisible by page size.\n"
" paddr_min: 0x%lx page_size: 0x%x",
paddr_min, vm->page_size);
- /* Locate memory region. */
region = memslot2region(vm, memslot);
+ base = pg = paddr_min >> vm->page_shift;
- /* Locate next available physical page at or above paddr_min. */
- pg = paddr_min >> vm->page_shift;
-
- if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
- pg = sparsebit_next_set(region->unused_phy_pages, pg);
- if (pg == 0) {
- fprintf(stderr, "No guest physical page available, "
- "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
- paddr_min, vm->page_size, memslot);
- fputs("---- vm dump ----\n", stderr);
- vm_dump(stderr, vm, 2);
- abort();
+ do {
+ for (; pg < base + num; ++pg) {
+ if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+ base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
+ break;
+ }
}
+ } while (pg && pg != base + num);
+
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ abort();
}
- /* Specify page as in use and return its address. */
- sparsebit_clear(region->unused_phy_pages, pg);
+ for (pg = base; pg < base + num; ++pg)
+ sparsebit_clear(region->unused_phy_pages, pg);
+
+ return base * vm->page_size;
+}
- return pg * vm->page_size;
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot)
+{
+ return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
-/* Address Guest Virtual to Host Virtual
+/*
+ * Address Guest Virtual to Host Virtual
*
* Input Args:
* vm - Virtual Machine
@@ -1669,17 +1487,3 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
-
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
- struct guest_args *args)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct kvm_regs regs;
-
- memset(&regs, 0, sizeof(regs));
- vcpu_regs_get(vm, vcpu_id, &regs);
-
- args->port = run->io.port;
- args->arg0 = regs.rdi;
- args->arg1 = regs.rsi;
-}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 542ed606b338..52701db0f253 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -1,28 +1,29 @@
/*
- * tools/testing/selftests/kvm/lib/kvm_util.c
+ * tools/testing/selftests/kvm/lib/kvm_util_internal.h
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
-#ifndef KVM_UTIL_INTERNAL_H
-#define KVM_UTIL_INTERNAL_H 1
+#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
+#define SELFTEST_KVM_UTIL_INTERNAL_H
#include "sparsebit.h"
+#define KVM_DEV_PATH "/dev/kvm"
+
#ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE 8
+#define BITS_PER_BYTE 8
#endif
#ifndef BITS_PER_LONG
-#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
#endif
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
-/* Concrete definition of struct kvm_vm. */
struct userspace_mem_region {
struct userspace_mem_region *next, *prev;
struct kvm_userspace_memory_region region;
@@ -45,14 +46,16 @@ struct kvm_vm {
int mode;
int kvm_fd;
int fd;
+ unsigned int pgtable_levels;
unsigned int page_size;
unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
uint64_t max_gfn;
struct vcpu *vcpu_head;
struct userspace_mem_region *userspace_mem_region_head;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
-
bool has_irqchip;
bool pgd_created;
vm_paddr_t pgd;
@@ -60,13 +63,11 @@ struct kvm_vm {
vm_vaddr_t tss;
};
-struct vcpu *vcpu_find(struct kvm_vm *vm,
- uint32_t vcpuid);
-void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
+ int gdt_memslot);
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void regs_dump(FILE *stream, struct kvm_regs *regs,
- uint8_t indent);
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
- uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
-#endif
+#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c
new file mode 100644
index 000000000000..4777f9bb5194
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+static ucall_type_t ucall_type;
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+ return false;
+
+ virt_pg_map(vm, gpa, gpa, 0);
+
+ ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+ sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+ return true;
+}
+
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
+{
+ ucall_type = type;
+ sync_global_to_guest(vm, ucall_type);
+
+ if (type == UCALL_PIO)
+ return;
+
+ if (type == UCALL_MMIO) {
+ vm_paddr_t gpa, start, end, step;
+ bool ret;
+
+ if (arg) {
+ gpa = (vm_paddr_t)arg;
+ ret = ucall_mmio_init(vm, gpa);
+ TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+ return;
+ }
+
+ /*
+ * Find an address within the allowed virtual address space,
+ * that does _not_ have a KVM memory region associated with it.
+ * Identity mapping an address like this allows the guest to
+ * access it, but as KVM doesn't know what to do with it, it
+ * will assume it's something userspace handles and exit with
+ * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+ * Here we start with a guess that the addresses around two
+ * thirds of the VA space are unmapped and then work both down
+ * and up from there in 1/6 VA space sized steps.
+ */
+ start = 1ul << (vm->va_bits * 2 / 3);
+ end = 1ul << vm->va_bits;
+ step = 1ul << (vm->va_bits / 6);
+ for (gpa = start; gpa >= 0; gpa -= step) {
+ if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+ return;
+ }
+ for (gpa = start + step; gpa < end; gpa += step) {
+ if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+ return;
+ }
+ TEST_ASSERT(false, "Can't find a ucall mmio address");
+ }
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+ ucall_type = 0;
+ sync_global_to_guest(vm, ucall_type);
+ ucall_exit_mmio_addr = 0;
+ sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+static void ucall_pio_exit(struct ucall *uc)
+{
+#ifdef __x86_64__
+ asm volatile("in %[port], %%al"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
+#endif
+}
+
+static void ucall_mmio_exit(struct ucall *uc)
+{
+ *ucall_exit_mmio_addr = (vm_vaddr_t)uc;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ switch (ucall_type) {
+ case UCALL_PIO:
+ ucall_pio_exit(&uc);
+ break;
+ case UCALL_MMIO:
+ ucall_mmio_exit(&uc);
+ break;
+ };
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+
+ memset(uc, 0, sizeof(*uc));
+
+#ifdef __x86_64__
+ if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
+ run->io.port == UCALL_PIO_PORT) {
+ struct kvm_regs regs;
+ vcpu_regs_get(vm, vcpu_id, &regs);
+ memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
+ return uc->cmd;
+ }
+#endif
+ if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+ vm_vaddr_t gva;
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+ "Unexpected ucall exit mmio address access");
+ gva = *(vm_vaddr_t *)run->mmio.data;
+ memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
+ }
+
+ return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a3122f1949a8..f28127f4a3af 100644
--- a/tools/testing/selftests/kvm/lib/x86.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1,5 +1,5 @@
/*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
*
* Copyright (C) 2018, Google LLC.
*
@@ -10,8 +10,8 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "kvm_util_internal.h"
-#include "x86.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
/* Minimum physical address used for virtual translation tables. */
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
@@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
{
int rc;
- TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
@@ -264,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint16_t index[4];
struct pageMapL4Entry *pml4e;
- TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -551,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
struct pageTableEntry *pte;
void *hva;
- TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
index[0] = (gva >> 12) & 0x1ffu;
@@ -624,9 +624,9 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
switch (vm->mode) {
- case VM_MODE_FLAT48PG:
+ case VM_MODE_P52V48_4K:
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
- sregs.cr4 |= X86_CR4_PAE;
+ sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
@@ -672,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
vcpu_set_mp_state(vm, vcpuid, &mp_state);
}
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100;
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ if (!cpuid) {
+ perror("malloc");
+ abort();
+ }
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ cpuid = kvm_get_supported_cpuid();
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
/* VM VCPU CPUID Set
*
* Input Args:
@@ -698,6 +794,7 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
rc, errno);
}
+
/* Create a VM with reasonable defaults
*
* Input Args:
@@ -726,7 +823,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
/* Create VM */
- vm = vm_create(VM_MODE_FLAT48PG,
+ vm = vm_create(VM_MODE_P52V48_4K,
DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
O_RDWR);
@@ -742,6 +839,154 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
return vm;
}
+/* VCPU Get MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+
+ return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
struct kvm_x86_state {
struct kvm_vcpu_events events;
struct kvm_mp_state mp_state;
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index b987c3c970eb..771ba6bf751c 100644
--- a/tools/testing/selftests/kvm/lib/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -1,5 +1,5 @@
/*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/vmx.c
*
* Copyright (C) 2018, Google LLC.
*
@@ -10,9 +10,11 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#include "vmx.h"
+bool enable_evmcs;
+
/* Allocate memory regions for nested VMX tests.
*
* Input Args:
@@ -62,6 +64,20 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
+ /* Setup of a region of guest memory for the VP Assist page. */
+ vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
+ vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
+
+ /* Setup of a region of guest memory for the enlightened VMCS. */
+ vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ vmx->enlightened_vmcs_hva =
+ addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
+ vmx->enlightened_vmcs_gpa =
+ addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
+
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -107,18 +123,31 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
if (vmxon(vmx->vmxon_gpa))
return false;
- /* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
- if (vmclear(vmx->vmcs_gpa))
- return false;
-
- if (vmptrld(vmx->vmcs_gpa))
- return false;
+ return true;
+}
- /* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
- if (vmclear(vmx->shadow_vmcs_gpa))
- return false;
+bool load_vmcs(struct vmx_pages *vmx)
+{
+ if (!enable_evmcs) {
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) =
+ vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
+ } else {
+ if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
+ vmx->enlightened_vmcs))
+ return false;
+ current_evmcs->revision_id = vmcs_revision();
+ }
return true;
}
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 11ec358bf969..d503a51fad30 100644
--- a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -17,7 +17,7 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#define X86_FEATURE_XSAVE (1<<26)
#define X86_FEATURE_OSXSAVE (1<<27)
@@ -67,6 +67,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct kvm_sregs sregs;
struct kvm_cpuid_entry2 *entry;
+ struct ucall uc;
int rc;
entry = kvm_get_supported_cpuid_entry(1);
@@ -87,21 +88,20 @@ int main(int argc, char *argv[])
rc = _vcpu_run(vm, VCPU_ID);
if (run->exit_reason == KVM_EXIT_IO) {
- switch (run->io.port) {
- case GUEST_PORT_SYNC:
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
/* emulate hypervisor clearing CR4.OSXSAVE */
vcpu_sregs_get(vm, VCPU_ID, &sregs);
sregs.cr4 &= ~X86_CR4_OSXSAVE;
vcpu_sregs_set(vm, VCPU_ID, &sregs);
break;
- case GUEST_PORT_ABORT:
+ case UCALL_ABORT:
TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
break;
- case GUEST_PORT_DONE:
+ case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown port 0x%x.",
- run->io.port);
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
}
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
new file mode 100644
index 000000000000..92c2cfd1b182
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID 5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+ GUEST_SYNC(6);
+
+ GUEST_SYNC(7);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_SYNC(9);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+ uint16_t evmcs_ver;
+ struct kvm_enable_cap enable_evmcs_cap = {
+ .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ .args[0] = (unsigned long)&evmcs_ver
+ };
+
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+ !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ printf("capabilities not available, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 3764e7121265..eb3e7a838cb4 100644
--- a/tools/testing/selftests/kvm/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -19,7 +19,7 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#define VCPU_ID 0
#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
@@ -48,7 +48,7 @@ static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
static void test_msr_platform_info_enabled(struct kvm_vm *vm)
{
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct guest_args args;
+ struct ucall uc;
set_msr_platform_info_enabled(vm, true);
vcpu_run(vm, VCPU_ID);
@@ -56,11 +56,11 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- guest_args_read(vm, VCPU_ID, &args);
- TEST_ASSERT(args.port == GUEST_PORT_SYNC,
- "Received IO from port other than PORT_HOST_SYNC: %u\n",
- run->io.port);
- TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+ get_ucall(vm, VCPU_ID, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %u\n",
+ ucall);
+ TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 881419d5746e..35640e8e95bc 100644
--- a/tools/testing/selftests/kvm/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,7 +22,7 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#define VCPU_ID 5
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 900e3e9dfb9f..03da41f0f736 100644
--- a/tools/testing/selftests/kvm/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -17,7 +17,7 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#include "vmx.h"
#define VCPU_ID 5
@@ -26,20 +26,20 @@ static bool have_nested_state;
void l2_guest_code(void)
{
- GUEST_SYNC(5);
+ GUEST_SYNC(6);
/* Exit to L1 */
vmcall();
/* L1 has now set up a shadow VMCS for us. */
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
- GUEST_SYNC(9);
+ GUEST_SYNC(10);
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
- GUEST_SYNC(10);
+ GUEST_SYNC(11);
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
- GUEST_SYNC(11);
+ GUEST_SYNC(12);
/* Done, exit to L1 and never come back. */
vmcall();
@@ -52,15 +52,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(vmx_pages->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_vmcs(vmx_pages));
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
- GUEST_SYNC(3);
+ GUEST_SYNC(4);
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
prepare_vmcs(vmx_pages, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
- GUEST_SYNC(4);
+ GUEST_SYNC(5);
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
GUEST_ASSERT(!vmlaunch());
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
@@ -72,7 +74,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(6);
+ GUEST_SYNC(7);
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
GUEST_ASSERT(!vmresume());
@@ -85,12 +87,12 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
GUEST_ASSERT(vmlaunch());
- GUEST_SYNC(7);
+ GUEST_SYNC(8);
GUEST_ASSERT(vmlaunch());
GUEST_ASSERT(vmresume());
vmwrite(GUEST_RIP, 0xc0ffee);
- GUEST_SYNC(8);
+ GUEST_SYNC(9);
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
@@ -101,7 +103,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
GUEST_ASSERT(vmlaunch());
GUEST_ASSERT(vmresume());
- GUEST_SYNC(12);
+ GUEST_SYNC(13);
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
GUEST_ASSERT(vmlaunch());
GUEST_ASSERT(vmresume());
@@ -127,6 +129,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_x86_state *state;
+ struct ucall uc;
int stage;
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
@@ -155,23 +158,23 @@ int main(int argc, char *argv[])
memset(&regs1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, &regs1);
- switch (run->io.port) {
- case GUEST_PORT_ABORT:
- TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
- __FILE__, regs1.rsi);
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
/* NOT REACHED */
- case GUEST_PORT_SYNC:
+ case UCALL_SYNC:
break;
- case GUEST_PORT_DONE:
+ case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
}
- /* PORT_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
- regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
- stage, (ulong) regs1.rsi);
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+ stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 213343e5dff9..c8478ce9ea77 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -19,7 +19,7 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#define VCPU_ID 5
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 49bcc68b0235..18fa64db0d7a 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -1,5 +1,5 @@
/*
- * gtests/tests/vmx_tsc_adjust_test.c
+ * vmx_tsc_adjust_test
*
* Copyright (C) 2018, Google LLC.
*
@@ -22,13 +22,13 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
#include "vmx.h"
#include <string.h>
#include <sys/ioctl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef MSR_IA32_TSC_ADJUST
#define MSR_IA32_TSC_ADJUST 0x3b
@@ -94,6 +94,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
/* Prepare the VMCS for L2 execution. */
prepare_vmcs(vmx_pages, l2_guest_code,
@@ -146,26 +147,25 @@ int main(int argc, char *argv[])
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct guest_args args;
+ struct ucall uc;
vcpu_run(vm, VCPU_ID);
- guest_args_read(vm, VCPU_ID, &args);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (args.port) {
- case GUEST_PORT_ABORT:
- TEST_ASSERT(false, "%s", (const char *) args.arg0);
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
/* NOT REACHED */
- case GUEST_PORT_SYNC:
- report(args.arg1);
+ case UCALL_SYNC:
+ report(uc.args[1]);
break;
- case GUEST_PORT_DONE:
+ case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
}
}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 78b24cf76f40..8cf22b3c2563 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -14,3 +14,4 @@ udpgso_bench_rx
udpgso_bench_tx
tcp_inq
tls
+ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 919aa2ac00af..256d82d5fa87 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,13 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 3991ad1a368d..864f865eee55 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -167,8 +167,8 @@ setup()
# add vrf table
ip li add ${VRF} type vrf table ${VRF_TABLE}
ip li set ${VRF} up
- ip ro add table ${VRF_TABLE} unreachable default
- ip -6 ro add table ${VRF_TABLE} unreachable default
+ ip ro add table ${VRF_TABLE} unreachable default metric 8192
+ ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
# create test interfaces
ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
@@ -185,20 +185,20 @@ setup()
for n in 1 3 5 7; do
ip li set ${NETIFS[p${n}]} up
ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
# move peer interfaces to namespace and add addresses
for n in 2 4 6 8; do
ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
- set +e
+ ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+ ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
- # let DAD complete - assume default of 1 probe
- sleep 1
+ set +e
}
cleanup()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 0f45633bd634..802b4af18729 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,11 +9,11 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
-IP="ip -netns testns"
+IP="ip -netns ns1"
log_test()
{
@@ -47,8 +47,10 @@ log_test()
setup()
{
set -e
- ip netns add testns
+ ip netns add ns1
$IP link set dev lo up
+ ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP link add dummy0 type dummy
$IP link set dev dummy0 up
@@ -61,7 +63,8 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del testns
+ ip netns del ns1
+ ip netns del ns2 &> /dev/null
}
get_linklocal()
@@ -639,11 +642,14 @@ add_initial_route6()
check_route6()
{
- local pfx="2001:db8:104::/64"
+ local pfx
local expected="$1"
local out
local rc=0
+ set -- $expected
+ pfx=$1
+
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
[ "${out}" = "${expected}" ] && return 0
@@ -690,28 +696,33 @@ route_setup()
[ "${VERBOSE}" = "1" ] && set -x
set -e
- $IP li add red up type vrf table 101
+ ip netns add ns2
+ ip -netns ns2 link set dev lo up
+ ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+
$IP li add veth1 type veth peer name veth2
$IP li add veth3 type veth peer name veth4
$IP li set veth1 up
$IP li set veth3 up
- $IP li set veth2 vrf red up
- $IP li set veth4 vrf red up
- $IP li add dummy1 type dummy
- $IP li set dummy1 vrf red up
-
- $IP -6 addr add 2001:db8:101::1/64 dev veth1
- $IP -6 addr add 2001:db8:101::2/64 dev veth2
- $IP -6 addr add 2001:db8:103::1/64 dev veth3
- $IP -6 addr add 2001:db8:103::2/64 dev veth4
- $IP -6 addr add 2001:db8:104::1/64 dev dummy1
+ $IP li set veth2 netns ns2 up
+ $IP li set veth4 netns ns2 up
+ ip -netns ns2 li add dummy1 type dummy
+ ip -netns ns2 li set dummy1 up
+ $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
+ $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
$IP addr add 172.16.101.1/24 dev veth1
- $IP addr add 172.16.101.2/24 dev veth2
$IP addr add 172.16.103.1/24 dev veth3
- $IP addr add 172.16.103.2/24 dev veth4
- $IP addr add 172.16.104.1/24 dev dummy1
+
+ ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+ ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+ ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+
+ ip -netns ns2 addr add 172.16.101.2/24 dev veth2
+ ip -netns ns2 addr add 172.16.103.2/24 dev veth4
+ ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
set +ex
}
@@ -944,7 +955,7 @@ ipv6_addr_metric_test()
log_test $rc 0 "Modify metric of address"
# verify prefix route removed on down
- run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
@@ -967,6 +978,77 @@ ipv6_addr_metric_test()
cleanup
}
+ipv6_route_metrics_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 routes with metrics"
+
+ route_setup
+
+ #
+ # single path with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400"
+ rc=$?
+ fi
+ log_test $rc 0 "Single path route with mtu metric"
+
+
+ #
+ # multipath via separate routes with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400"
+ run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first"
+
+ # second route is coalesced to first to make a multipath route.
+ # MTU of the second path is hidden from display!
+ run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd"
+
+ run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2"
+ if [ $? -eq 0 ]; then
+ check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400"
+ log_test $? 0 " MTU of second leg"
+ fi
+
+ #
+ # multipath with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route with mtu metric"
+
+ $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
+ run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+ log_test $? 0 "Using route with mtu metric"
+
+ run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo"
+ log_test $? 2 "Invalid metric (fails metric_convert)"
+
+ route_cleanup
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@@ -1005,11 +1087,15 @@ add_initial_route()
check_route()
{
- local pfx="172.16.104.0/24"
+ local pfx
local expected="$1"
local out
local rc=0
+ set -- $expected
+ pfx=$1
+ [ "${pfx}" = "unreachable" ] && pfx=$2
+
out=$($IP ro ls match ${pfx})
[ "${out}" = "${expected}" ] && return 0
@@ -1319,6 +1405,43 @@ ipv4_addr_metric_test()
cleanup
}
+ipv4_route_metrics_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 route add / append tests"
+
+ route_setup
+
+ run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400"
+ rc=$?
+ fi
+ log_test $rc 0 "Single path route with mtu metric"
+
+
+ run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route with mtu metric"
+
+ $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
+ run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+ log_test $? 0 "Using route with mtu metric"
+
+ run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
+ log_test $? 2 "Invalid metric (fails metric_convert)"
+
+ route_cleanup
+}
+
+
################################################################################
# usage
@@ -1385,6 +1508,8 @@ do
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
ipv4_addr_metric) ipv4_addr_metric_test;;
+ ipv6_route_metrics) ipv6_route_metrics_test;;
+ ipv4_route_metrics) ipv4_route_metrics_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
new file mode 100755
index 000000000000..1f8ef0eff862
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="sticky"
+NUM_NETIFS=4
+TEST_MAC=de:ad:be:ef:13:37
+source lib.sh
+
+switch_create()
+{
+ ip link add dev br0 type bridge
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $h1 up
+ ip link set dev $swp1 up
+ ip link set dev $h2 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $h2 down
+ ip link set dev $swp1 down
+ ip link set dev $h1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+ switch_destroy
+}
+
+sticky()
+{
+ bridge fdb add $TEST_MAC dev $swp1 master static sticky
+ check_err $? "Could not add fdb entry"
+ bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky
+ $MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q
+ bridge -j fdb show br br0 brport $swp1\
+ | jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ log_test "Sticky fdb entry"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ca53b539aa2d..85d253546684 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -251,7 +251,7 @@ lldpad_app_wait_set()
{
local dev=$1; shift
- while lldptool -t -i $dev -V APP -c app | grep -q pending; do
+ while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
echo "$dev: waiting for lldpad to push pending APP updates"
sleep 5
done
@@ -494,6 +494,14 @@ tc_rule_stats_get()
| jq '.[1].options.actions[].stats.packets'
}
+ethtool_stats_get()
+{
+ local dev=$1; shift
+ local stat=$1; shift
+
+ ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
+}
+
mac_get()
{
local if_name=$1
@@ -541,6 +549,23 @@ forwarding_restore()
sysctl_restore net.ipv4.conf.all.forwarding
}
+declare -A MTU_ORIG
+mtu_set()
+{
+ local dev=$1; shift
+ local mtu=$1; shift
+
+ MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
+ ip link set dev $dev mtu $mtu
+}
+
+mtu_restore()
+{
+ local dev=$1; shift
+
+ ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
+}
+
tc_offload_check()
{
local num_netifs=${1:-$NUM_NETIFS}
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
new file mode 100644
index 000000000000..61ae2782388e
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static bool cfg_verbose;
+static bool cfg_overlap;
+static unsigned short cfg_port = 9000;
+
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+
+#define IP4_HLEN (sizeof(struct iphdr))
+#define IP6_HLEN (sizeof(struct ip6_hdr))
+#define UDP_HLEN (sizeof(struct udphdr))
+
+/* IPv6 fragment header lenth. */
+#define FRAG_HLEN 8
+
+static int payload_len;
+static int max_frag_len;
+
+#define MSG_LEN_MAX 60000 /* Max UDP payload length. */
+
+#define IP4_MF (1u << 13) /* IPv4 MF flag. */
+#define IP6_MF (1) /* IPv6 MF flag. */
+
+#define CSUM_MANGLED_0 (0xffff)
+
+static uint8_t udp_payload[MSG_LEN_MAX];
+static uint8_t ip_frame[IP_MAXPACKET];
+static uint32_t ip_id = 0xabcd;
+static int msg_counter;
+static int frag_counter;
+static unsigned int seed;
+
+/* Receive a UDP packet. Validate it matches udp_payload. */
+static void recv_validate_udp(int fd_udp)
+{
+ ssize_t ret;
+ static uint8_t recv_buff[MSG_LEN_MAX];
+
+ ret = recv(fd_udp, recv_buff, payload_len, 0);
+ msg_counter++;
+
+ if (cfg_overlap) {
+ if (ret != -1)
+ error(1, 0, "recv: expected timeout; got %d",
+ (int)ret);
+ if (errno != ETIMEDOUT && errno != EAGAIN)
+ error(1, errno, "recv: expected timeout: %d",
+ errno);
+ return; /* OK */
+ }
+
+ if (ret == -1)
+ error(1, errno, "recv: payload_len = %d max_frag_len = %d",
+ payload_len, max_frag_len);
+ if (ret != payload_len)
+ error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len);
+ if (memcmp(udp_payload, recv_buff, payload_len))
+ error(1, 0, "recv: wrong data");
+}
+
+static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
+{
+ int i;
+
+ for (i = 0; i < (len & ~1U); i += 2) {
+ sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
+ if (sum > 0xffff)
+ sum -= 0xffff;
+ }
+
+ if (i < len) {
+ sum += buf[i] << 8;
+ if (sum > 0xffff)
+ sum -= 0xffff;
+ }
+
+ return sum;
+}
+
+static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
+{
+ uint32_t sum = 0;
+ uint16_t res;
+
+ sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
+ IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len));
+ sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+ sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+ res = 0xffff & ~sum;
+ if (res)
+ return htons(res);
+ else
+ return CSUM_MANGLED_0;
+}
+
+static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr)
+{
+ uint32_t sum = 0;
+ uint16_t res;
+
+ sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src),
+ IPPROTO_UDP);
+ sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum);
+ sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+ sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+ res = 0xffff & ~sum;
+ if (res)
+ return htons(res);
+ else
+ return CSUM_MANGLED_0;
+}
+
+static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
+ int offset, bool ipv6)
+{
+ int frag_len;
+ int res;
+ int payload_offset = offset > 0 ? offset - UDP_HLEN : 0;
+ uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN :
+ ip_frame + IP4_HLEN;
+
+ if (offset == 0) {
+ struct udphdr udphdr;
+ udphdr.source = htons(cfg_port + 1);
+ udphdr.dest = htons(cfg_port);
+ udphdr.len = htons(UDP_HLEN + payload_len);
+ udphdr.check = 0;
+ if (ipv6)
+ udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr);
+ else
+ udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr);
+ memcpy(frag_start, &udphdr, UDP_HLEN);
+ }
+
+ if (ipv6) {
+ struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+ /* This is the last fragment. */
+ frag_len = FRAG_HLEN + payload_len - payload_offset;
+ fraghdr->ip6f_offlg = htons(offset);
+ } else {
+ frag_len = FRAG_HLEN + max_frag_len;
+ fraghdr->ip6f_offlg = htons(offset | IP6_MF);
+ }
+ ip6hdr->ip6_plen = htons(frag_len);
+ if (offset == 0)
+ memcpy(frag_start + UDP_HLEN, udp_payload,
+ frag_len - FRAG_HLEN - UDP_HLEN);
+ else
+ memcpy(frag_start, udp_payload + payload_offset,
+ frag_len - FRAG_HLEN);
+ frag_len += IP6_HLEN;
+ } else {
+ struct ip *iphdr = (struct ip *)ip_frame;
+ if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+ /* This is the last fragment. */
+ frag_len = IP4_HLEN + payload_len - payload_offset;
+ iphdr->ip_off = htons(offset / 8);
+ } else {
+ frag_len = IP4_HLEN + max_frag_len;
+ iphdr->ip_off = htons(offset / 8 | IP4_MF);
+ }
+ iphdr->ip_len = htons(frag_len);
+ if (offset == 0)
+ memcpy(frag_start + UDP_HLEN, udp_payload,
+ frag_len - IP4_HLEN - UDP_HLEN);
+ else
+ memcpy(frag_start, udp_payload + payload_offset,
+ frag_len - IP4_HLEN);
+ }
+
+ res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+ if (res < 0)
+ error(1, errno, "send_fragment");
+ if (res != frag_len)
+ error(1, 0, "send_fragment: %d vs %d", res, frag_len);
+
+ frag_counter++;
+}
+
+static void send_udp_frags(int fd_raw, struct sockaddr *addr,
+ socklen_t alen, bool ipv6)
+{
+ struct ip *iphdr = (struct ip *)ip_frame;
+ struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+ int res;
+ int offset;
+ int frag_len;
+
+ /* Send the UDP datagram using raw IP fragments: the 0th fragment
+ * has the UDP header; other fragments are pieces of udp_payload
+ * split in chunks of frag_len size.
+ *
+ * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
+ * even fragments (0th, 2nd, etc.) are sent out.
+ */
+ if (ipv6) {
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ ((struct sockaddr_in6 *)addr)->sin6_port = 0;
+ memset(ip6hdr, 0, sizeof(*ip6hdr));
+ ip6hdr->ip6_flow = htonl(6<<28); /* Version. */
+ ip6hdr->ip6_nxt = IPPROTO_FRAGMENT;
+ ip6hdr->ip6_hops = 255;
+ ip6hdr->ip6_src = addr6;
+ ip6hdr->ip6_dst = addr6;
+ fraghdr->ip6f_nxt = IPPROTO_UDP;
+ fraghdr->ip6f_reserved = 0;
+ fraghdr->ip6f_ident = htonl(ip_id++);
+ } else {
+ memset(iphdr, 0, sizeof(*iphdr));
+ iphdr->ip_hl = 5;
+ iphdr->ip_v = 4;
+ iphdr->ip_tos = 0;
+ iphdr->ip_id = htons(ip_id++);
+ iphdr->ip_ttl = 0x40;
+ iphdr->ip_p = IPPROTO_UDP;
+ iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
+ iphdr->ip_dst = addr4;
+ iphdr->ip_sum = 0;
+ }
+
+ /* Odd fragments. */
+ offset = max_frag_len;
+ while (offset < (UDP_HLEN + payload_len)) {
+ send_fragment(fd_raw, addr, alen, offset, ipv6);
+ offset += 2 * max_frag_len;
+ }
+
+ if (cfg_overlap) {
+ /* Send an extra random fragment. */
+ offset = rand() % (UDP_HLEN + payload_len - 1);
+ /* sendto() returns EINVAL if offset + frag_len is too small. */
+ if (ipv6) {
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ frag_len = max_frag_len + rand() % 256;
+ /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */
+ frag_len &= ~0x7;
+ fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF);
+ ip6hdr->ip6_plen = htons(frag_len);
+ frag_len += IP6_HLEN;
+ } else {
+ frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
+ iphdr->ip_off = htons(offset / 8 | IP4_MF);
+ iphdr->ip_len = htons(frag_len);
+ }
+ res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+ if (res < 0)
+ error(1, errno, "sendto overlap");
+ if (res != frag_len)
+ error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
+ frag_counter++;
+ }
+
+ /* Event fragments. */
+ offset = 0;
+ while (offset < (UDP_HLEN + payload_len)) {
+ send_fragment(fd_raw, addr, alen, offset, ipv6);
+ offset += 2 * max_frag_len;
+ }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6)
+{
+ int fd_tx_raw, fd_rx_udp;
+ struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 };
+ int idx;
+ int min_frag_len = ipv6 ? 1280 : 8;
+
+ /* Initialize the payload. */
+ for (idx = 0; idx < MSG_LEN_MAX; ++idx)
+ udp_payload[idx] = idx % 256;
+
+ /* Open sockets. */
+ fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
+ if (fd_tx_raw == -1)
+ error(1, errno, "socket tx_raw");
+
+ fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fd_rx_udp == -1)
+ error(1, errno, "socket rx_udp");
+ if (bind(fd_rx_udp, addr, alen))
+ error(1, errno, "bind");
+ /* Fail fast. */
+ if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX;
+ payload_len += (rand() % 4096)) {
+ if (cfg_verbose)
+ printf("payload_len: %d\n", payload_len);
+ max_frag_len = min_frag_len;
+ do {
+ send_udp_frags(fd_tx_raw, addr, alen, ipv6);
+ recv_validate_udp(fd_rx_udp);
+ max_frag_len += 8 * (rand() % 8);
+ } while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len);
+ }
+
+ /* Cleanup. */
+ if (close(fd_tx_raw))
+ error(1, errno, "close tx_raw");
+ if (close(fd_rx_udp))
+ error(1, errno, "close rx_udp");
+
+ if (cfg_verbose)
+ printf("processed %d messages, %d fragments\n",
+ msg_counter, frag_counter);
+
+ fprintf(stderr, "PASS\n");
+}
+
+
+static void run_test_v4(void)
+{
+ struct sockaddr_in addr = {0};
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cfg_port);
+ addr.sin_addr = addr4;
+
+ run_test((void *)&addr, sizeof(addr), false /* !ipv6 */);
+}
+
+static void run_test_v6(void)
+{
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr), true /* ipv6 */);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46ov")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'o':
+ cfg_overlap = true;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ seed = time(NULL);
+ srand(seed);
+ /* Print the seed to track/reproduce potential failures. */
+ printf("seed = %d\n", seed);
+
+ if (cfg_do_ipv4)
+ run_test_v4();
+ if (cfg_do_ipv6)
+ run_test_v6();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
new file mode 100755
index 000000000000..f34672796044
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of IP defragmentation tests.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1
+ ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1
+ ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+echo "ipv4 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -4
+
+
+echo "ipv4 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -4o
+
+echo "ipv6 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -6
+
+echo "ipv6 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -6o
+
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 0ab9423d009f..a369d616b390 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -6,6 +6,26 @@
#
# Tests currently implemented:
#
+# - pmtu_ipv4
+# Set up two namespaces, A and B, with two paths between them over routers
+# R1 and R2 (also implemented with namespaces), with different MTUs:
+#
+# segment a_r1 segment b_r1 a_r1: 2000
+# .--------------R1--------------. a_r2: 1500
+# A B a_r3: 2000
+# '--------------R2--------------' a_r4: 1400
+# segment a_r2 segment b_r2
+#
+# Check that PMTU exceptions with the correct PMTU are created. Then
+# decrease and increase the MTU of the local link for one of the paths,
+# A to R1, checking that route exception PMTU changes accordingly over
+# this path. Also check that locked exceptions are created when an ICMP
+# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
+# received
+#
+# - pmtu_ipv6
+# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
+#
# - pmtu_vti4_exception
# Set up vti tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is not
@@ -50,6 +70,8 @@ ksft_skip=4
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
tests="
+ pmtu_ipv4_exception ipv4: PMTU exceptions
+ pmtu_ipv6_exception ipv6: PMTU exceptions
pmtu_vti6_exception vti6: PMTU exceptions
pmtu_vti4_exception vti4: PMTU exceptions
pmtu_vti4_default_mtu vti4: default MTU assignment
@@ -60,8 +82,45 @@ tests="
NS_A="ns-$(mktemp -u XXXXXX)"
NS_B="ns-$(mktemp -u XXXXXX)"
+NS_R1="ns-$(mktemp -u XXXXXX)"
+NS_R2="ns-$(mktemp -u XXXXXX)"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
+ns_r1="ip netns exec ${NS_R1}"
+ns_r2="ip netns exec ${NS_R2}"
+
+# Addressing and routing for tests with routers: four network segments, with
+# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
+# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
+# Addresses are:
+# - IPv4: PREFIX4.SEGMENT.ID (/24)
+# - IPv6: PREFIX6:SEGMENT::ID (/64)
+prefix4="192.168"
+prefix6="fd00"
+a_r1=1
+a_r2=2
+b_r1=3
+b_r2=4
+# ns peer segment
+routing_addrs="
+ A R1 ${a_r1}
+ A R2 ${a_r2}
+ B R1 ${b_r1}
+ B R2 ${b_r2}
+"
+# Traffic from A to B goes through R1 by default, and through R2, if destined to
+# B's address on the b_r2 segment.
+# Traffic from B to A goes through R1.
+# ns destination gateway
+routes="
+ A default ${prefix4}.${a_r1}.2
+ A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2
+ B default ${prefix4}.${b_r1}.2
+
+ A default ${prefix6}:${a_r1}::2
+ A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
+ B default ${prefix6}:${b_r1}::2
+"
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
@@ -83,6 +142,7 @@ dummy6_mask="64"
cleanup_done=1
err_buf=
+tcpdump_pids=
err() {
err_buf="${err_buf}${1}
@@ -94,9 +154,15 @@ err_flush() {
err_buf=
}
+# Find the auto-generated name for this namespace
+nsname() {
+ eval echo \$NS_$1
+}
+
setup_namespaces() {
- ip netns add ${NS_A} || return 1
- ip netns add ${NS_B}
+ for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ ip netns add ${n} || return 1
+ done
}
setup_veth() {
@@ -167,6 +233,49 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_routing() {
+ for i in ${NS_R1} ${NS_R2}; do
+ ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
+ ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
+ done
+
+ for i in ${routing_addrs}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${peer}" = "" ] && peer="${i}" && continue
+ [ "${segment}" = "" ] && segment="${i}"
+
+ ns_name="$(nsname ${ns})"
+ peer_name="$(nsname ${peer})"
+ if="veth_${ns}-${peer}"
+ ifpeer="veth_${peer}-${ns}"
+
+ # Create veth links
+ ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
+ ip -n ${peer_name} link set dev ${ifpeer} up
+
+ # Add addresses
+ ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if}
+ ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if}
+
+ ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer}
+ ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
+
+ ns=""; peer=""; segment=""
+ done
+
+ for i in ${routes}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} route add ${addr} via ${gw}
+
+ ns=""; addr=""; gw=""
+ done
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -176,10 +285,28 @@ setup() {
done
}
+trace() {
+ [ $tracing -eq 0 ] && return
+
+ for arg do
+ [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
+ ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+ tcpdump_pids="${tcpdump_pids} $!"
+ ns_cmd=
+ done
+ sleep 1
+}
+
cleanup() {
+ for pid in ${tcpdump_pids}; do
+ kill ${pid}
+ done
+ tcpdump_pids=
+
[ ${cleanup_done} -eq 1 ] && return
- ip netns del ${NS_A} 2> /dev/null
- ip netns del ${NS_B} 2> /dev/null
+ for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ ip netns del ${n} 2> /dev/null
+ done
cleanup_done=1
}
@@ -196,7 +323,9 @@ mtu_parse() {
next=0
for i in ${input}; do
+ [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
[ ${next} -eq 1 ] && echo "${i}" && return
+ [ ${next} -eq 2 ] && echo "lock ${i}" && return
[ "${i}" = "mtu" ] && next=1
done
}
@@ -229,8 +358,117 @@ route_get_dst_pmtu_from_exception() {
mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
}
+check_pmtu_value() {
+ expected="${1}"
+ value="${2}"
+ event="${3}"
+
+ [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
+ [ "${value}" = "${expected}" ] && return 0
+ [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1
+ [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1
+ err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
+ return 1
+}
+
+test_pmtu_ipvX() {
+ family=${1}
+
+ setup namespaces routing || return 2
+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst1="${prefix4}.${b_r1}.1"
+ dst2="${prefix4}.${b_r2}.1"
+ else
+ ping=${ping6}
+ dst1="${prefix6}:${b_r1}::1"
+ dst2="${prefix6}:${b_r2}::1"
+ fi
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1400
+ mtu "${ns_b}" veth_B-R1 1400
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ # Create route exceptions
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+ # Decrease local MTU below PMTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" veth_A-R1 1300
+ mtu "${ns_r1}" veth_R1-A 1300
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
+ # Second exception shouldn't be modified
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+ # Increase MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" veth_A-R1 1700
+ mtu "${ns_r1}" veth_R1-A 1700
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
+ # Second exception shouldn't be modified
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+ # Skip PMTU locking tests for IPv6
+ [ $family -eq 6 ] && return 0
+
+ # Decrease remote MTU on path via R2, get new exception
+ mtu "${ns_r2}" veth_R2-B 400
+ mtu "${ns_b}" veth_B-R2 400
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+
+ # Decrease local MTU below PMTU
+ mtu "${ns_a}" veth_A-R2 500
+ mtu "${ns_r2}" veth_R2-A 500
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
+
+ # Increase local MTU
+ mtu "${ns_a}" veth_A-R2 1500
+ mtu "${ns_r2}" veth_R2-A 1500
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
+
+ # Get new exception
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+}
+
+test_pmtu_ipv4_exception() {
+ test_pmtu_ipvX 4
+}
+
+test_pmtu_ipv6_exception() {
+ test_pmtu_ipvX 6
+}
+
test_pmtu_vti4_exception() {
setup namespaces veth vti4 xfrm4 || return 2
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
veth_mtu=1500
vti_mtu=$((veth_mtu - 20))
@@ -248,28 +486,19 @@ test_pmtu_vti4_exception() {
# exception is created
${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
- if [ "${pmtu}" != "" ]; then
- err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
- return 1
- fi
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
# Now exceed link layer MTU by one byte, check that exception is created
+ # with the right PMTU value
${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
- if [ "${pmtu}" = "" ]; then
- err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
- return 1
- fi
-
- # ...with the right PMTU value
- if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
- err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
- return 1
- fi
+ check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
}
test_pmtu_vti6_exception() {
setup namespaces veth vti6 xfrm6 || return 2
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
fail=0
# Create route exception by exceeding link layer MTU
@@ -280,25 +509,18 @@ test_pmtu_vti6_exception() {
${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
# Check that exception was created
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
- err " tunnel exceeding link layer MTU didn't create route exception"
- return 1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
# Decrease tunnel MTU, check for PMTU decrease in route exception
mtu "${ns_a}" vti6_a 3000
-
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
- err " decreasing tunnel MTU didn't decrease route exception PMTU"
- fail=1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
# Increase tunnel MTU, check for PMTU increase in route exception
mtu "${ns_a}" vti6_a 9000
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
- err " increasing tunnel MTU didn't increase route exception PMTU"
- fail=1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
return ${fail}
}
@@ -445,15 +667,56 @@ test_pmtu_vti6_link_change_mtu() {
return ${fail}
}
-trap cleanup EXIT
+usage() {
+ echo
+ echo "$0 [OPTIONS] [TEST]..."
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " --trace: capture traffic to TEST_INTERFACE.pcap"
+ echo
+ echo "Available tests${tests}"
+ exit 1
+}
exitcode=0
desc=0
IFS="
"
+
+tracing=0
+for arg do
+ if [ "${arg}" != "${arg#--*}" ]; then
+ opt="${arg#--}"
+ if [ "${opt}" = "trace" ]; then
+ if which tcpdump > /dev/null 2>&1; then
+ tracing=1
+ else
+ echo "=== tcpdump not available, tracing disabled"
+ fi
+ else
+ usage
+ fi
+ else
+ # Check first that all requested tests are available before
+ # running any
+ command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+ fi
+done
+
+trap cleanup EXIT
+
for t in ${tests}; do
[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+ run_this=1
+ for arg do
+ [ "${arg}" != "${arg#--*}" ] && continue
+ [ "${arg}" = "${name}" ] && run_this=1 && break
+ run_this=0
+ done
+ [ $run_this -eq 0 ] && continue
+
(
unset IFS
eval test_${name}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 8fdfeafaf8c0..fac68d710f35 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile)
buf = (char *)malloc(st.st_size);
EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
- EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+ EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send);
EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
- EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+ EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
}
TEST_F(tls, recv_max)
@@ -160,7 +160,7 @@ TEST_F(tls, msg_more)
EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL),
send_len * 2);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single)
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
- EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe)
ASSERT_GE(pipe(p), 0);
EXPECT_GE(write(p[1], mem_send, send_len), 0);
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2)
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice)
ASSERT_GE(pipe(p), 0);
EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
- EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
EXPECT_GE(write(p[1], mem_send, send_len), send_len);
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv)
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
memset(recv_mem, 0, total_len);
- EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len);
EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
@@ -516,17 +516,17 @@ TEST_F(tls, recv_peek_multiple_records)
len = strlen(test_str_second) + 1;
EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
- len = sizeof(buf);
+ len = strlen(test_str_first);
memset(buf, 0, len);
- EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
/* MSG_PEEK can only peek into the current record. */
- len = strlen(test_str_first) + 1;
+ len = strlen(test_str_first);
EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
- len = sizeof(buf);
+ len = strlen(test_str) + 1;
memset(buf, 0, len);
- EXPECT_NE(recv(self->cfd, buf, len, 0), -1);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
/* Non-MSG_PEEK will advance strparser (and therefore record)
* however.
@@ -543,6 +543,28 @@ TEST_F(tls, recv_peek_multiple_records)
len = strlen(test_str_second) + 1;
EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, recv_peek_large_buf_mult_recs)
+{
+ char const *test_str = "test_read_peek_mult_recs";
+ char const *test_str_first = "test_read_peek";
+ char const *test_str_second = "_mult_recs";
+ int len;
+ char buf[64];
+
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
len = sizeof(buf);
memset(buf, 0, len);
EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
@@ -551,6 +573,7 @@ TEST_F(tls, recv_peek_multiple_records)
EXPECT_EQ(memcmp(test_str, buf, len), 0);
}
+
TEST_F(tls, pollin)
{
char const *test_str = "test_poll";
@@ -564,7 +587,7 @@ TEST_F(tls, pollin)
EXPECT_EQ(poll(&fd, 1, 20), 1);
EXPECT_EQ(fd.revents & POLLIN, 1);
- EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
/* Test timing out */
EXPECT_EQ(poll(&fd, 1, 20), 0);
}
@@ -582,7 +605,7 @@ TEST_F(tls, poll_wait)
/* Set timeout to inf. secs */
EXPECT_EQ(poll(&fd, 1, -1), 1);
EXPECT_EQ(fd.revents & POLLIN, 1);
- EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len);
}
TEST_F(tls, blocking)
@@ -728,7 +751,7 @@ TEST_F(tls, control_msg)
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
vec.iov_base = buf;
- EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+ EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
cmsg = CMSG_FIRSTHDR(&msg);
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 201b598558b9..b3ad909aefbc 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -28,7 +28,8 @@ SUB_DIRS = alignment \
tm \
vphn \
math \
- ptrace
+ ptrace \
+ security
endif
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 7f348c059bc2..52b4710469d2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
: "memory")
#define mb() asm volatile("sync" : : : "memory");
+#define barrier() asm volatile("" : : : "memory");
#define SPRN_MMCR2 769
#define SPRN_MMCRA 770
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index c58c370828b4..49621822d7c3 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -11,6 +11,7 @@
#include <stdint.h>
#include <stdbool.h>
#include <linux/auxvec.h>
+#include <linux/perf_event.h>
#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
@@ -31,6 +32,15 @@ void *get_auxv_entry(int type);
int pick_online_cpu(void);
+int read_debugfs_file(char *debugfs_file, int *result);
+int write_debugfs_file(char *debugfs_file, int result);
+void set_dscr(unsigned long val);
+int perf_event_open_counter(unsigned int type,
+ unsigned long config, int group_fd);
+int perf_event_enable(int fd);
+int perf_event_disable(int fd);
+int perf_event_reset(int fd);
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -80,4 +90,12 @@ do { \
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
#endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 7d7c42ed6de9..ba919308fe30 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
subpage_prot
tempfile
prot_sao
-segv_errors \ No newline at end of file
+segv_errors
+wild_bctr \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 33ced6e0ad25..43d68420e363 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
@@ -12,6 +12,8 @@ $(TEST_GEN_PROGS): ../harness.c
$(OUTPUT)/prot_sao: ../utils.c
+$(OUTPUT)/wild_bctr: CFLAGS += -m64
+
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
new file mode 100644
index 000000000000..1b0e9e9a2ddc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP 0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+ save_regs(ctxt_v);
+ longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+ save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+ printf("Everything is OK in here.\n");
+ return 0;
+}
+
+#define REG_POISON 0x5a5aUL
+#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+
+static inline void poison_regs(void)
+{
+ #define POISON_REG(n) \
+ "lis " __stringify(n) "," __stringify(REG_POISON) ";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+ "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+ "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+ asm (POISON_REG(15)
+ POISON_REG(16)
+ POISON_REG(17)
+ POISON_REG(18)
+ POISON_REG(19)
+ POISON_REG(20)
+ POISON_REG(21)
+ POISON_REG(22)
+ POISON_REG(23)
+ POISON_REG(24)
+ POISON_REG(25)
+ POISON_REG(26)
+ POISON_REG(27)
+ POISON_REG(28)
+ POISON_REG(29)
+ : // inputs
+ : // outputs
+ : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+ "26", "27", "28", "29"
+ );
+ #undef POISON_REG
+}
+
+static int check_regs(void)
+{
+ unsigned long i;
+
+ for (i = 15; i <= 29; i++)
+ FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+ printf("Regs OK\n");
+ return 0;
+}
+
+static void dump_regs(void)
+{
+ for (int i = 0; i < 32; i += 4) {
+ printf("r%02d 0x%016lx r%02d 0x%016lx " \
+ "r%02d 0x%016lx r%02d 0x%016lx\n",
+ i, signal_regs.gpr[i],
+ i+1, signal_regs.gpr[i+1],
+ i+2, signal_regs.gpr[i+2],
+ i+3, signal_regs.gpr[i+3]);
+ }
+}
+
+int test_wild_bctr(void)
+{
+ int (*func_ptr)(void);
+ struct sigaction segv = {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO
+ };
+ struct sigaction usr2 = {
+ .sa_sigaction = usr2_handler,
+ .sa_flags = SA_SIGINFO
+ };
+
+ FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+ FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+ bzero(&signal_regs, sizeof(signal_regs));
+
+ if (setjmp(setjmp_env) == 0) {
+ func_ptr = ok;
+ func_ptr();
+
+ kill(getpid(), SIGUSR2);
+ printf("Regs before:\n");
+ dump_regs();
+ bzero(&signal_regs, sizeof(signal_regs));
+
+ poison_regs();
+
+ func_ptr = (int (*)(void))BAD_NIP;
+ func_ptr();
+
+ FAIL_IF(1); /* we didn't segv? */
+ }
+
+ FAIL_IF(signal_regs.nip != BAD_NIP);
+
+ printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+ dump_regs();
+ FAIL_IF(check_regs());
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_wild_bctr, "wild_bctr");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
index ed3239bbfae2..ee1e9ca22f0d 100644
--- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -65,14 +65,6 @@ static int unprotect_region(void)
extern char __start___ex_table[];
extern char __stop___ex_table[];
-#if defined(__powerpc64__)
-#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
-#elif defined(__powerpc__)
-#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
-#else
-#error implement UCONTEXT_NIA
-#endif
-
struct extbl_entry {
int insn;
int fixup;
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 923d531265f8..9b35ca8e8f13 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -2,7 +2,7 @@
TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
- perf-hwbreak
+ perf-hwbreak ptrace-syscall
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
new file mode 100644
index 000000000000..3353210dcdbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and
+ * PTRACE_GETREG. This test basically create a child process that executes
+ * syscalls and the parent process check if it is being traced appropriated.
+ *
+ * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c
+ * test, and it was adapted to run on Powerpc by
+ * Breno Leitao <leitao@debian.org>
+ */
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include "utils.h"
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#define user_syscall_nr gpr[0]
+#define user_arg0 gpr[3]
+#define user_arg1 gpr[4]
+#define user_arg2 gpr[5]
+#define user_arg3 gpr[6]
+#define user_arg4 gpr[7]
+#define user_arg5 gpr[8]
+#define user_ip nip
+
+#define PTRACE_SYSEMU 0x1d
+
+static int nerrs;
+
+static void wait_trap(pid_t chld)
+{
+ siginfo_t si;
+
+ if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+ err(1, "waitid");
+ if (si.si_pid != chld)
+ errx(1, "got unexpected pid in event\n");
+ if (si.si_code != CLD_TRAPPED)
+ errx(1, "got unexpected event type %d\n", si.si_code);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+ int status;
+ struct pt_regs regs;
+ pid_t chld;
+
+ printf("[RUN]\tptrace-induced syscall restart\n");
+
+ chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ /*
+ * Child process is running 4 syscalls after ptrace.
+ *
+ * 1) getpid()
+ * 2) gettid()
+ * 3) tgkill() -> Send SIGSTOP
+ * 4) gettid() -> Where the tests will happen essentially
+ */
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will make one syscall\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+ _exit(0);
+ }
+ /* Parent process below */
+
+ /* Wait for SIGSTOP sent by tgkill above. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("[RUN]\tSYSEMU\n");
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /*
+ * Ptrace trapped prior to executing the syscall, thus r3 still has
+ * the syscall number instead of the sys_gettid() result
+ */
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n"); }
+
+ printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * Rewind to retry the same syscall again. This will basically test
+ * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS.
+ */
+ regs.user_ip -= 4;
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tRestarted nr and args are correct\n");
+ }
+
+ printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * Inject a new syscall (getpid) in the same place the previous
+ * syscall (gettid), rewind and re-execute.
+ */
+ regs.user_syscall_nr = SYS_getpid;
+ regs.user_arg0 = 20;
+ regs.user_arg1 = 21;
+ regs.user_arg2 = 22;
+ regs.user_arg3 = 23;
+ regs.user_arg4 = 24;
+ regs.user_arg5 = 25;
+ regs.user_ip -= 4;
+
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Check that ptrace stopped at the new syscall that was
+ * injected, and guarantee that it haven't executed, i.e, user_args
+ * contain the arguments and not the syscall return value, for
+ * instance.
+ */
+ if (regs.user_syscall_nr != SYS_getpid
+ || regs.user_arg0 != 20 || regs.user_arg1 != 21
+ || regs.user_arg2 != 22 || regs.user_arg3 != 23
+ || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tReplacement nr and args are correct\n");
+ }
+
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ /* Guarantee that the process executed properly, returning 0 */
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild exited cleanly\n");
+ }
+}
+
+int ptrace_syscall(void)
+{
+ test_ptrace_syscall_restart();
+
+ return nerrs;
+}
+
+int main(void)
+{
+ return test_harness(ptrace_syscall, "ptrace_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
new file mode 100644
index 000000000000..44690f1bb26a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+TEST_GEN_PROGS := rfi_flush
+
+CFLAGS += -I../../../../../usr/include
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
new file mode 100644
index 000000000000..564ed45bbf73
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+ __u64 nr;
+ __u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+ __u64 tmp;
+
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+ return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ getppid();
+ }
+}
+
+int rfi_flush_test(void)
+{
+ char *p;
+ int repetitions = 10;
+ int fd, passes = 0, iter, rc = 0;
+ struct perf_event_read v;
+ __u64 l1d_misses_total = 0;
+ unsigned long iterations = 100000, zero_size = 24 * 1024;
+ int rfi_flush_org, rfi_flush;
+
+ SKIP_IF(geteuid() != 0);
+
+ if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+ perror("Unable to read powerpc/rfi_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ rfi_flush = rfi_flush_org;
+
+ fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+ FAIL_IF(fd < 0);
+
+ p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+ FAIL_IF(perf_event_enable(fd));
+
+ set_dscr(1);
+
+ iter = repetitions;
+
+again:
+ FAIL_IF(perf_event_reset(fd));
+
+ syscall_loop(p, iterations, zero_size);
+
+ FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+ /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
+ if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+ passes++;
+ else if (v.l1d_misses < iterations && !rfi_flush)
+ passes++;
+
+ l1d_misses_total += v.l1d_misses;
+
+ while (--iter)
+ goto again;
+
+ if (passes < repetitions) {
+ printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+ rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
+ rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ repetitions - passes, repetitions);
+ rc = 1;
+ } else
+ printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+ rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
+ rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ passes, repetitions);
+
+ if (rfi_flush == rfi_flush_org) {
+ rfi_flush = !rfi_flush_org;
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+ perror("error writing to powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+ iter = repetitions;
+ l1d_misses_total = 0;
+ passes = 0;
+ goto again;
+ }
+
+ perf_event_disable(fd);
+ close(fd);
+
+ set_dscr(0);
+
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+ perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(rfi_flush_test, "rfi_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 2bda81c7bf23..df1d7d4b1c89 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -98,7 +98,7 @@ void texasr(void *in)
int test_tmspr()
{
- pthread_t thread;
+ pthread_t *thread;
int thread_num;
unsigned long i;
@@ -107,21 +107,28 @@ int test_tmspr()
/* To cause some context switching */
thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+ thread = malloc(thread_num * sizeof(pthread_t));
+ if (thread == NULL)
+ return EXIT_FAILURE;
+
/* Test TFIAR and TFHAR */
- for (i = 0 ; i < thread_num ; i += 2){
- if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+ for (i = 0; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+ (void *)i))
return EXIT_FAILURE;
}
- if (pthread_join(thread, NULL) != 0)
- return EXIT_FAILURE;
-
/* Test TEXASR */
- for (i = 0 ; i < thread_num ; i++){
- if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+ for (i = 1; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
return EXIT_FAILURE;
}
- if (pthread_join(thread, NULL) != 0)
- return EXIT_FAILURE;
+
+ for (i = 0; i < thread_num; i++) {
+ if (pthread_join(thread[i], NULL) != 0)
+ return EXIT_FAILURE;
+ }
+
+ free(thread);
if (passed)
return 0;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 156c8e750259..09894f4ff62e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -236,7 +236,8 @@ void *tm_una_ping(void *input)
}
/* Check if we were not expecting a failure and a it occurred. */
- if (!expecting_failure() && is_failure(cr_)) {
+ if (!expecting_failure() && is_failure(cr_) &&
+ !failure_is_reschedule()) {
printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
failure_code());
return (void *) -1;
@@ -244,9 +245,11 @@ void *tm_una_ping(void *input)
/*
* Check if TM failed due to the cause we were expecting. 0xda is a
- * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+ * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+ * it was caused by a reschedule.
*/
- if (is_failure(cr_) && !failure_is_unavailable()) {
+ if (is_failure(cr_) && !failure_is_unavailable() &&
+ !failure_is_reschedule()) {
printf("\n\tUnexpected failure cause 0x%02lx\n\t",
failure_code());
return (void *) -1;
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index df4204247d45..5518b1d4ef8b 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -52,6 +52,15 @@ static inline bool failure_is_unavailable(void)
return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
}
+static inline bool failure_is_reschedule(void)
+{
+ if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+ (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+ return true;
+
+ return false;
+}
+
static inline bool failure_is_nesting(void)
{
return (__builtin_get_texasru() & 0x400000);
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index aa8fc1e6365b..43c342845be0 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,16 +10,22 @@
#include <fcntl.h>
#include <link.h>
#include <sched.h>
+#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
+#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/limits.h>
#include "utils.h"
static char auxv[4096];
+extern unsigned int dscr_insn[];
int read_auxv(char *buf, ssize_t buf_size)
{
@@ -121,3 +127,149 @@ bool is_ppc64le(void)
return strcmp(uts.machine, "ppc64le") == 0;
}
+
+int read_debugfs_file(char *debugfs_file, int *result)
+{
+ int rc = -1, fd;
+ char path[PATH_MAX];
+ char value[16];
+
+ strcpy(path, "/sys/kernel/debug/");
+ strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ return rc;
+
+ if ((rc = read(fd, value, sizeof(value))) < 0)
+ return rc;
+
+ value[15] = 0;
+ *result = atoi(value);
+ close(fd);
+
+ return 0;
+}
+
+int write_debugfs_file(char *debugfs_file, int result)
+{
+ int rc = -1, fd;
+ char path[PATH_MAX];
+ char value[16];
+
+ strcpy(path, "/sys/kernel/debug/");
+ strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+ if ((fd = open(path, O_WRONLY)) < 0)
+ return rc;
+
+ snprintf(value, 16, "%d", result);
+
+ if ((rc = write(fd, value, strlen(value))) < 0)
+ return rc;
+
+ close(fd);
+
+ return 0;
+}
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+ group_fd, flags);
+}
+
+static void perf_event_attr_init(struct perf_event_attr *event_attr,
+ unsigned int type,
+ unsigned long config)
+{
+ memset(event_attr, 0, sizeof(*event_attr));
+
+ event_attr->type = type;
+ event_attr->size = sizeof(struct perf_event_attr);
+ event_attr->config = config;
+ event_attr->read_format = PERF_FORMAT_GROUP;
+ event_attr->disabled = 1;
+ event_attr->exclude_kernel = 1;
+ event_attr->exclude_hv = 1;
+ event_attr->exclude_guest = 1;
+}
+
+int perf_event_open_counter(unsigned int type,
+ unsigned long config, int group_fd)
+{
+ int fd;
+ struct perf_event_attr event_attr;
+
+ perf_event_attr_init(&event_attr, type, config);
+
+ fd = perf_event_open(&event_attr, 0, -1, group_fd, 0);
+
+ if (fd < 0)
+ perror("perf_event_open() failed");
+
+ return fd;
+}
+
+int perf_event_enable(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error while enabling perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_event_disable(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error disabling perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_event_reset(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error resetting perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned = 0;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ if (*pc == (unsigned long)&dscr_insn) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+void set_dscr(unsigned long val)
+{
+ static int init = 0;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 49a6f8c3fdae..f9281e8aa313 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -232,6 +232,8 @@ directory:
and the other is a test whether the command leaked memory or not.
(This one is a preliminary version, it may not work quite right yet,
but the overall template is there and it should only need tweaks.)
+ - buildebpfPlugin.py:
+ builds all programs in $EBPFDIR.
ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000000..dc92eb271d9a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000000..c32b99b80e19
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+ return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+ s->data = 0x0;
+ return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
new file mode 100644
index 000000000000..9f0ba10c44b4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -0,0 +1,66 @@
+'''
+build ebpf program
+'''
+
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'buildebpf/SubPlugin'
+ self.tap = ''
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ super().pre_suite(testcount, testidlist)
+
+ if self.args.buildebpf:
+ self._ebpf_makeall()
+
+ def post_suite(self, index):
+ super().post_suite(index)
+
+ self._ebpf_makeclean()
+
+ def add_args(self, parser):
+ super().add_args(parser)
+
+ self.argparser_group = self.argparser.add_argument_group(
+ 'buildebpf',
+ 'options for buildebpfPlugin')
+ self.argparser_group.add_argument(
+ '-B', '--buildebpf', action='store_true',
+ help='build eBPF programs')
+
+ return self.argparser
+
+ def _ebpf_makeall(self):
+ if self.args.buildebpf:
+ self._make('all')
+
+ def _ebpf_makeclean(self):
+ if self.args.buildebpf:
+ self._make('clean')
+
+ def _make(self, target):
+ command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8")
+ else:
+ foutput = rawout.decode("utf-8")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 6f289a49e5ec..5970cee6d05f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -55,7 +55,6 @@
"bpf"
],
"setup": [
- "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
[
"$TC action flush action bpf",
0,
@@ -63,14 +62,13 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
- "$TC action flush action bpf",
- "rm -f _b.o"
+ "$TC action flush action bpf"
]
},
{
@@ -81,7 +79,6 @@
"bpf"
],
"setup": [
- "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
[
"$TC action flush action bpf",
0,
@@ -89,10 +86,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
[
@@ -100,8 +97,7 @@
0,
1,
255
- ],
- "rm -f _c.o"
+ ]
]
},
{
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index 68c91023cdb9..89189a03ce3d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -536,5 +536,29 @@
"matchPattern": "^[ \t]+index [0-9]+ ref",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "8e47",
+ "name": "Add gact action with random determ goto chain control action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pass random determ goto chain 1 2 index 90",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass random type determ goto chain 1 val 2.*index 90 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 30f9b54bd666..4086a50a670e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -715,5 +715,29 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "b48b",
+ "name": "Add police action with exceed goto chain control action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1mbit burst 1k conform-exceed pass / goto chain 42",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 1Mbit burst 1Kb mtu 2Kb action pass/goto chain 42",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index a023d0d62b25..d651bc1501bd 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -16,7 +16,9 @@ NAMES = {
'DEV2': '',
'BATCH_FILE': './batch.txt',
# Name of the namespace to use
- 'NS': 'tcut'
+ 'NS': 'tcut',
+ # Directory containing eBPF test programs
+ 'EBPFDIR': './bpf'
}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index af5ff83f6d7f..31b3c98b6d34 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -13,3 +13,4 @@ mlock-random-test
virtual_address_range
gup_benchmark
va_128TBswitch
+map_fixed_noreplace
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index e94b7b14bcb2..6e67e726e5a5 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_FILES += gup_benchmark
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_populate
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 36df55132036..880b96fc80d4 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,9 +15,12 @@
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
struct gup_benchmark {
- __u64 delta_usec;
+ __u64 get_delta_usec;
+ __u64 put_delta_usec;
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
@@ -28,10 +31,12 @@ int main(int argc, char **argv)
{
struct gup_benchmark gup;
unsigned long size = 128 * MB;
- int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+ char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -48,13 +53,36 @@ int main(int argc, char **argv)
case 'T':
thp = 0;
break;
+ case 'L':
+ cmd = GUP_LONGTERM_BENCHMARK;
+ break;
+ case 'U':
+ cmd = GUP_BENCHMARK;
+ break;
case 'w':
write = 1;
+ break;
+ case 'f':
+ file = optarg;
+ break;
+ case 'S':
+ flags &= ~MAP_PRIVATE;
+ flags |= MAP_SHARED;
+ break;
+ case 'H':
+ flags |= MAP_HUGETLB;
+ break;
default:
return -1;
}
}
+ filed = open(file, O_RDWR|O_CREAT);
+ if (filed < 0) {
+ perror("open");
+ exit(filed);
+ }
+
gup.nr_pages_per_call = nr_pages;
gup.flags = write;
@@ -62,8 +90,7 @@ int main(int argc, char **argv)
if (fd == -1)
perror("open"), exit(1);
- p = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
if (p == MAP_FAILED)
perror("mmap"), exit(1);
gup.addr = (unsigned long)p;
@@ -78,10 +105,11 @@ int main(int argc, char **argv)
for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+ if (ioctl(fd, cmd, &gup))
perror("ioctl"), exit(1);
- printf("Time: %lld us", gup.delta_usec);
+ printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+ gup.put_delta_usec);
if (gup.size != size)
printf(", truncated (size: %lld)", gup.size);
printf("\n");
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
new file mode 100644
index 000000000000..d91bde511268
--- /dev/null
+++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test that MAP_FIXED_NOREPLACE works.
+ *
+ * Copyright 2018, Jann Horn <jannh@google.com>
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0x100000
+#endif
+
+#define BASE_ADDRESS (256ul * 1024 * 1024)
+
+
+static void dump_maps(void)
+{
+ char cmd[32];
+
+ snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+ system(cmd);
+}
+
+int main(void)
+{
+ unsigned long flags, addr, size, page_size;
+ char *p;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
+
+ // Check we can map all the areas we need below
+ errno = 0;
+ addr = BASE_ADDRESS;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p == MAP_FAILED) {
+ dump_maps();
+ printf("Error: couldn't map the space we need for the test\n");
+ return 1;
+ }
+
+ errno = 0;
+ if (munmap((void *)addr, 5 * page_size) != 0) {
+ dump_maps();
+ printf("Error: munmap failed!?\n");
+ return 1;
+ }
+ printf("unmap() successful\n");
+
+ errno = 0;
+ addr = BASE_ADDRESS + page_size;
+ size = 3 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p == MAP_FAILED) {
+ dump_maps();
+ printf("Error: first mmap() failed unexpectedly\n");
+ return 1;
+ }
+
+ /*
+ * Exact same mapping again:
+ * base | free | new
+ * +1 | mapped | new
+ * +2 | mapped | new
+ * +3 | mapped | new
+ * +4 | free | new
+ */
+ errno = 0;
+ addr = BASE_ADDRESS;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p != MAP_FAILED) {
+ dump_maps();
+ printf("Error:1: mmap() succeeded when it shouldn't have\n");
+ return 1;
+ }
+
+ /*
+ * Second mapping contained within first:
+ *
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped | new
+ * +3 | mapped |
+ * +4 | free |
+ */
+ errno = 0;
+ addr = BASE_ADDRESS + (2 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p != MAP_FAILED) {
+ dump_maps();
+ printf("Error:2: mmap() succeeded when it shouldn't have\n");
+ return 1;
+ }
+
+ /*
+ * Overlap end of existing mapping:
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped | new
+ * +4 | free | new
+ */
+ errno = 0;
+ addr = BASE_ADDRESS + (3 * page_size);
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p != MAP_FAILED) {
+ dump_maps();
+ printf("Error:3: mmap() succeeded when it shouldn't have\n");
+ return 1;
+ }
+
+ /*
+ * Overlap start of existing mapping:
+ * base | free | new
+ * +1 | mapped | new
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free |
+ */
+ errno = 0;
+ addr = BASE_ADDRESS;
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p != MAP_FAILED) {
+ dump_maps();
+ printf("Error:4: mmap() succeeded when it shouldn't have\n");
+ return 1;
+ }
+
+ /*
+ * Adjacent to start of existing mapping:
+ * base | free | new
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free |
+ */
+ errno = 0;
+ addr = BASE_ADDRESS;
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p == MAP_FAILED) {
+ dump_maps();
+ printf("Error:5: mmap() failed when it shouldn't have\n");
+ return 1;
+ }
+
+ /*
+ * Adjacent to end of existing mapping:
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free | new
+ */
+ errno = 0;
+ addr = BASE_ADDRESS + (4 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+ if (p == MAP_FAILED) {
+ dump_maps();
+ printf("Error:6: mmap() failed when it shouldn't have\n");
+ return 1;
+ }
+
+ addr = BASE_ADDRESS;
+ size = 5 * page_size;
+ if (munmap((void *)addr, size) != 0) {
+ dump_maps();
+ printf("Error: munmap failed!?\n");
+ return 1;
+ }
+ printf("unmap() successful\n");
+
+ printf("OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 7b8171e3128a..5d1db824f73a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -34,18 +34,6 @@
* per-CPU threads 1 by triggering userfaults inside
* pthread_mutex_lock will also verify the atomicity of the memory
* transfer (UFFDIO_COPY).
- *
- * The program takes two parameters: the amounts of physical memory in
- * megabytes (MiB) of the area and the number of bounces to execute.
- *
- * # 100MiB 99999 bounces
- * ./userfaultfd 100 99999
- *
- * # 1GiB 99 bounces
- * ./userfaultfd 1000 99
- *
- * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
- * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
*/
#define _GNU_SOURCE
@@ -115,6 +103,30 @@ pthread_attr_t attr;
~(unsigned long)(sizeof(unsigned long long) \
- 1)))
+const char *examples =
+ "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+ "./userfaultfd anon 100 99999\n\n"
+ "# Run share memory test on 1GiB region with 99 bounces:\n"
+ "./userfaultfd shmem 1000 99\n\n"
+ "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
+ "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
+ "# Run the same hugetlb test but using shmem:\n"
+ "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
+ "# 10MiB-~6GiB 999 bounces anonymous test, "
+ "continue forever unless an error triggers\n"
+ "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
+ "[hugetlbfs_file]\n\n");
+ fprintf(stderr, "Supported <test type>: anon, hugetlb, "
+ "hugetlb_shared, shmem\n\n");
+ fprintf(stderr, "Examples:\n\n");
+ fprintf(stderr, examples);
+ exit(1);
+}
+
static int anon_release_pages(char *rel_area)
{
int ret = 0;
@@ -439,6 +451,43 @@ static int copy_page(int ufd, unsigned long offset)
return __copy_page(ufd, offset, false);
}
+static int uffd_read_msg(int ufd, struct uffd_msg *msg)
+{
+ int ret = read(uffd, msg, sizeof(*msg));
+
+ if (ret != sizeof(*msg)) {
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ return 1;
+ else
+ perror("blocking read error"), exit(1);
+ } else {
+ fprintf(stderr, "short read\n"), exit(1);
+ }
+ }
+
+ return 0;
+}
+
+/* Return 1 if page fault handled by us; otherwise 0 */
+static int uffd_handle_page_fault(struct uffd_msg *msg)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg->event), exit(1);
+
+ if (bounces & BOUNCE_VERIFY &&
+ msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+
+ return copy_page(uffd, offset);
+}
+
static void *uffd_poll_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
@@ -446,7 +495,6 @@ static void *uffd_poll_thread(void *arg)
struct uffd_msg msg;
struct uffdio_register uffd_reg;
int ret;
- unsigned long offset;
char tmp_chr;
unsigned long userfaults = 0;
@@ -470,25 +518,15 @@ static void *uffd_poll_thread(void *arg)
if (!(pollfd[0].revents & POLLIN))
fprintf(stderr, "pollfd[0].revents %d\n",
pollfd[0].revents), exit(1);
- ret = read(uffd, &msg, sizeof(msg));
- if (ret < 0) {
- if (errno == EAGAIN)
- continue;
- perror("nonblocking read error"), exit(1);
- }
+ if (uffd_read_msg(uffd, &msg))
+ continue;
switch (msg.event) {
default:
fprintf(stderr, "unexpected msg event %u\n",
msg.event), exit(1);
break;
case UFFD_EVENT_PAGEFAULT:
- if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
- offset = (char *)(unsigned long)msg.arg.pagefault.address -
- area_dst;
- offset &= ~(page_size-1);
- if (copy_page(uffd, offset))
- userfaults++;
+ userfaults += uffd_handle_page_fault(&msg);
break;
case UFFD_EVENT_FORK:
close(uffd);
@@ -516,8 +554,6 @@ static void *uffd_read_thread(void *arg)
{
unsigned long *this_cpu_userfaults;
struct uffd_msg msg;
- unsigned long offset;
- int ret;
this_cpu_userfaults = (unsigned long *) arg;
*this_cpu_userfaults = 0;
@@ -526,24 +562,9 @@ static void *uffd_read_thread(void *arg)
/* from here cancellation is ok */
for (;;) {
- ret = read(uffd, &msg, sizeof(msg));
- if (ret != sizeof(msg)) {
- if (ret < 0)
- perror("blocking read error"), exit(1);
- else
- fprintf(stderr, "short read\n"), exit(1);
- }
- if (msg.event != UFFD_EVENT_PAGEFAULT)
- fprintf(stderr, "unexpected msg event %u\n",
- msg.event), exit(1);
- if (bounces & BOUNCE_VERIFY &&
- msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
- offset = (char *)(unsigned long)msg.arg.pagefault.address -
- area_dst;
- offset &= ~(page_size-1);
- if (copy_page(uffd, offset))
- (*this_cpu_userfaults)++;
+ if (uffd_read_msg(uffd, &msg))
+ continue;
+ (*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
}
return (void *)NULL;
}
@@ -605,6 +626,12 @@ static int stress(unsigned long *userfaults)
if (uffd_test_ops->release_pages(area_src))
return 1;
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
for (cpu = 0; cpu < nr_cpus; cpu++) {
char c;
if (bounces & BOUNCE_POLL) {
@@ -622,11 +649,6 @@ static int stress(unsigned long *userfaults)
}
}
- finished = 1;
- for (cpu = 0; cpu < nr_cpus; cpu++)
- if (pthread_join(locking_threads[cpu], NULL))
- return 1;
-
return 0;
}
@@ -1272,8 +1294,7 @@ static void sigalrm(int sig)
int main(int argc, char **argv)
{
if (argc < 4)
- fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
- exit(1);
+ usage();
if (signal(SIGALRM, sigalrm) == SIG_ERR)
fprintf(stderr, "failed to arm SIGALRM"), exit(1);
@@ -1286,20 +1307,19 @@ int main(int argc, char **argv)
nr_cpus;
if (!nr_pages_per_cpu) {
fprintf(stderr, "invalid MiB\n");
- fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ usage();
}
bounces = atoi(argv[3]);
if (bounces <= 0) {
fprintf(stderr, "invalid bounces\n");
- fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ usage();
}
nr_pages = nr_pages_per_cpu * nr_cpus;
if (test_type == TEST_HUGETLB) {
if (argc < 5)
- fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
- exit(1);
+ usage();
huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
if (huge_fd < 0) {
fprintf(stderr, "Open of %s failed", argv[3]);