summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_design_QA.rst44
-rw-r--r--Documentation/bpf/map_array.rst250
-rw-r--r--Documentation/bpf/map_cpumap.rst166
-rw-r--r--Documentation/bpf/map_lpm_trie.rst181
-rw-r--r--Documentation/bpf/map_of_maps.rst126
-rw-r--r--Documentation/bpf/map_queue_stack.rst122
-rw-r--r--drivers/net/veth.c2
-rw-r--r--include/linux/bpf.h179
-rw-r--r--include/linux/btf.h10
-rw-r--r--include/uapi/linux/bpf.h1
-rw-r--r--kernel/bpf/arraymap.c30
-rw-r--r--kernel/bpf/bpf_local_storage.c2
-rw-r--r--kernel/bpf/btf.c420
-rw-r--r--kernel/bpf/cpumap.c9
-rw-r--r--kernel/bpf/hashtab.c38
-rw-r--r--kernel/bpf/helpers.c6
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/map_in_map.c19
-rw-r--r--kernel/bpf/syscall.c373
-rw-r--r--kernel/bpf/verifier.c485
-rw-r--r--net/core/bpf_sk_storage.c4
-rw-r--r--net/core/filter.c43
-rw-r--r--samples/bpf/sockex3_kern.c95
-rw-r--r--samples/bpf/sockex3_user.c23
-rw-r--r--samples/bpf/tracex2_kern.c4
-rw-r--r--samples/bpf/tracex2_user.c3
-rw-r--r--tools/bpf/bpftool/btf.c25
-rw-r--r--tools/bpf/bpftool/common.c10
-rw-r--r--tools/bpf/bpftool/gen.c19
-rw-r--r--tools/bpf/bpftool/link.c10
-rw-r--r--tools/bpf/bpftool/main.h14
-rw-r--r--tools/bpf/bpftool/map.c10
-rw-r--r--tools/bpf/bpftool/pids.c16
-rw-r--r--tools/bpf/bpftool/prog.c10
-rw-r--r--tools/include/uapi/linux/bpf.h1
-rw-r--r--tools/lib/bpf/btf.c259
-rw-r--r--tools/lib/bpf/btf_dump.c15
-rw-r--r--tools/lib/bpf/hashmap.c18
-rw-r--r--tools/lib/bpf/hashmap.h91
-rw-r--r--tools/lib/bpf/libbpf.c18
-rw-r--r--tools/lib/bpf/strset.c18
-rw-r--r--tools/lib/bpf/usdt.c28
-rw-r--r--tools/perf/tests/expr.c28
-rw-r--r--tools/perf/tests/pmu-events.c6
-rw-r--r--tools/perf/util/bpf-loader.c11
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/expr.c36
-rw-r--r--tools/perf/util/hashmap.c18
-rw-r--r--tools/perf/util/hashmap.h91
-rw-r--r--tools/perf/util/metricgroup.c10
-rw-r--r--tools/perf/util/stat-shadow.c2
-rw-r--r--tools/perf/util/stat.c9
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h19
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c264
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c190
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.c38
-rw-r--r--tools/testing/selftests/bpf/veristat.c893
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c5
-rw-r--r--tools/testing/selftests/bpf/xsk.c26
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c3
68 files changed, 3592 insertions, 1371 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index a210b8a4df00..17e774d96c5e 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -298,3 +298,47 @@ A: NO.
The BTF_ID macro does not cause a function to become part of the ABI
any more than does the EXPORT_SYMBOL_GPL macro.
+
+Q: What is the compatibility story for special BPF types in map values?
+-----------------------------------------------------------------------
+Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
+values (when using BTF support for BPF maps). This allows to use helpers for
+such objects on these fields inside map values. Users are also allowed to embed
+pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+kernel preserve backwards compatibility for these features?
+
+A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
+NO, but see below.
+
+For struct types that have been added already, like bpf_spin_lock and bpf_timer,
+the kernel will preserve backwards compatibility, as they are part of UAPI.
+
+For kptrs, they are also part of UAPI, but only with respect to the kptr
+mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+pointer in your struct are NOT part of the UAPI contract. The supported types can
+and will change across kernel releases. However, operations like accessing kptr
+fields and bpf_kptr_xchg() helper will continue to be supported across kernel
+releases for the supported types.
+
+For any other supported struct type, unless explicitly stated in this document
+and added to bpf.h UAPI header, such types can and will arbitrarily change their
+size, type, and alignment, or any other user visible API or ABI detail across
+kernel releases. The users must adapt their BPF programs to the new changes and
+update them to make sure their programs continue to work correctly.
+
+NOTE: BPF subsystem specially reserves the 'bpf\_' prefix for type names, in
+order to introduce more special fields in the future. Hence, user programs must
+avoid defining types with 'bpf\_' prefix to not be broken in future releases.
+In other words, no backwards compatibility is guaranteed if one using a type
+in BTF with 'bpf\_' prefix.
+
+Q: What is the compatibility story for special BPF types in local kptrs?
+------------------------------------------------------------------------
+Q: Same as above, but for local kptrs (i.e. pointers to objects allocated using
+bpf_obj_new for user defined structures). Will the kernel preserve backwards
+compatibility for these features?
+
+A: NO.
+
+Unlike map value types, there are no stability guarantees for this case. The
+whole local kptr API itself is unstable (since it is exposed through kfuncs).
diff --git a/Documentation/bpf/map_array.rst b/Documentation/bpf/map_array.rst
new file mode 100644
index 000000000000..b2cceb6c696b
--- /dev/null
+++ b/Documentation/bpf/map_array.rst
@@ -0,0 +1,250 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+================================================
+BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY
+================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_ARRAY`` was introduced in kernel version 3.19
+ - ``BPF_MAP_TYPE_PERCPU_ARRAY`` was introduced in version 4.6
+
+``BPF_MAP_TYPE_ARRAY`` and ``BPF_MAP_TYPE_PERCPU_ARRAY`` provide generic array
+storage. The key type is an unsigned 32-bit integer (4 bytes) and the map is
+of constant size. The size of the array is defined in ``max_entries`` at
+creation time. All array elements are pre-allocated and zero initialized when
+created. ``BPF_MAP_TYPE_PERCPU_ARRAY`` uses a different memory region for each
+CPU whereas ``BPF_MAP_TYPE_ARRAY`` uses the same memory region. The value
+stored can be of any size, however, all array elements are aligned to 8
+bytes.
+
+Since kernel 5.5, memory mapping may be enabled for ``BPF_MAP_TYPE_ARRAY`` by
+setting the flag ``BPF_F_MMAPABLE``. The map definition is page-aligned and
+starts on the first page. Sufficient page-sized and page-aligned blocks of
+memory are allocated to store all array values, starting on the second page,
+which in some cases will result in over-allocation of memory. The benefit of
+using this is increased performance and ease of use since userspace programs
+would not be required to use helper functions to access and mutate data.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+.. c:function::
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Array elements can be retrieved using the ``bpf_map_lookup_elem()`` helper.
+This helper returns a pointer into the array element, so to avoid data races
+with userspace reading the value, the user must use primitives like
+``__sync_fetch_and_add()`` when updating the value in-place.
+
+.. c:function::
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Array elements can be updated using the ``bpf_map_update_elem()`` helper.
+
+``bpf_map_update_elem()`` returns 0 on success, or negative error in case of
+failure.
+
+Since the array is of constant size, ``bpf_map_delete_elem()`` is not supported.
+To clear an array element, you may use ``bpf_map_update_elem()`` to insert a
+zero value to that index.
+
+Per CPU Array
+~~~~~~~~~~~~~
+
+Values stored in ``BPF_MAP_TYPE_ARRAY`` can be accessed by multiple programs
+across different CPUs. To restrict storage to a single CPU, you may use a
+``BPF_MAP_TYPE_PERCPU_ARRAY``.
+
+When using a ``BPF_MAP_TYPE_PERCPU_ARRAY`` the ``bpf_map_update_elem()`` and
+``bpf_map_lookup_elem()`` helpers automatically access the slot for the current
+CPU.
+
+.. c:function::
+ void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+
+The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the array
+value for a specific CPU. Returns value on success , or ``NULL`` if no entry was
+found or ``cpu`` is invalid.
+
+Concurrency
+-----------
+
+Since kernel version 5.1, the BPF infrastructure provides ``struct bpf_spin_lock``
+to synchronize access.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by its ``fd``.
+
+Examples
+========
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples. The code samples below demonstrate API usage.
+
+Kernel BPF
+----------
+
+This snippet shows how to declare an array in a BPF program.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 256);
+ } my_map SEC(".maps");
+
+
+This example BPF program shows how to access an array element.
+
+.. code-block:: c
+
+ int bpf_prog(struct __sk_buff *skb)
+ {
+ struct iphdr ip;
+ int index;
+ long *value;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip, sizeof(ip)) < 0)
+ return 0;
+
+ index = ip.protocol;
+ value = bpf_map_lookup_elem(&my_map, &index);
+ if (value)
+ __sync_fetch_and_add(&value, skb->len);
+
+ return 0;
+ }
+
+Userspace
+---------
+
+BPF_MAP_TYPE_ARRAY
+~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to create an array, using ``bpf_map_create_opts`` to
+set flags.
+
+.. code-block:: c
+
+ #include <bpf/libbpf.h>
+ #include <bpf/bpf.h>
+
+ int create_array()
+ {
+ int fd;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY,
+ "example_array", /* name */
+ sizeof(__u32), /* key size */
+ sizeof(long), /* value size */
+ 256, /* max entries */
+ &opts); /* create opts */
+ return fd;
+ }
+
+This snippet shows how to initialize the elements of an array.
+
+.. code-block:: c
+
+ int initialize_array(int fd)
+ {
+ __u32 i;
+ long value;
+ int ret;
+
+ for (i = 0; i < 256; i++) {
+ value = i;
+ ret = bpf_map_update_elem(fd, &i, &value, BPF_ANY);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+ }
+
+This snippet shows how to retrieve an element value from an array.
+
+.. code-block:: c
+
+ int lookup(int fd)
+ {
+ __u32 index = 42;
+ long value;
+ int ret;
+
+ ret = bpf_map_lookup_elem(fd, &index, &value);
+ if (ret < 0)
+ return ret;
+
+ /* use value here */
+ assert(value == 42);
+
+ return ret;
+ }
+
+BPF_MAP_TYPE_PERCPU_ARRAY
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to initialize the elements of a per CPU array.
+
+.. code-block:: c
+
+ int initialize_array(int fd)
+ {
+ int ncpus = libbpf_num_possible_cpus();
+ long values[ncpus];
+ __u32 i, j;
+ int ret;
+
+ for (i = 0; i < 256 ; i++) {
+ for (j = 0; j < ncpus; j++)
+ values[j] = i;
+ ret = bpf_map_update_elem(fd, &i, &values, BPF_ANY);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+ }
+
+This snippet shows how to access the per CPU elements of an array value.
+
+.. code-block:: c
+
+ int lookup(int fd)
+ {
+ int ncpus = libbpf_num_possible_cpus();
+ __u32 index = 42, j;
+ long values[ncpus];
+ int ret;
+
+ ret = bpf_map_lookup_elem(fd, &index, &values);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < ncpus; j++) {
+ /* Use per CPU value here */
+ assert(values[j] == 42);
+ }
+
+ return ret;
+ }
+
+Semantics
+=========
+
+As shown in the example above, when accessing a ``BPF_MAP_TYPE_PERCPU_ARRAY``
+in userspace, each value is an array with ``ncpus`` elements.
+
+When calling ``bpf_map_update_elem()`` the flag ``BPF_NOEXIST`` can not be used
+for these maps.
diff --git a/Documentation/bpf/map_cpumap.rst b/Documentation/bpf/map_cpumap.rst
new file mode 100644
index 000000000000..eaf57b38cafd
--- /dev/null
+++ b/Documentation/bpf/map_cpumap.rst
@@ -0,0 +1,166 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_CPUMAP
+===================
+
+.. note::
+ - ``BPF_MAP_TYPE_CPUMAP`` was introduced in kernel version 4.15
+
+.. kernel-doc:: kernel/bpf/cpumap.c
+ :doc: cpu map
+
+An example use-case for this map type is software based Receive Side Scaling (RSS).
+
+The CPUMAP represents the CPUs in the system indexed as the map-key, and the
+map-value is the config setting (per CPUMAP entry). Each CPUMAP entry has a dedicated
+kernel thread bound to the given CPU to represent the remote CPU execution unit.
+
+Starting from Linux kernel version 5.9 the CPUMAP can run a second XDP program
+on the remote CPU. This allows an XDP program to split its processing across
+multiple CPUs. For example, a scenario where the initial CPU (that sees/receives
+the packets) needs to do minimal packet processing and the remote CPU (to which
+the packet is directed) can afford to spend more cycles processing the frame. The
+initial CPU is where the XDP redirect program is executed. The remote CPU
+receives raw ``xdp_frame`` objects.
+
+Usage
+=====
+
+Kernel BPF
+----------
+.. c:function::
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+ Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+ For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs.
+
+ The lower two bits of ``flags`` are used as the return code if the map lookup
+ fails. This is so that the return value can be one of the XDP program return
+ codes up to ``XDP_TX``, as chosen by the caller.
+
+Userspace
+---------
+.. note::
+ CPUMAP entries can only be updated/looked up/deleted from user space and not
+ from an eBPF program. Trying to call these functions from a kernel eBPF
+ program will result in the program failing to load and a verifier warning.
+
+.. c:function::
+ int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags);
+
+ CPU entries can be added or updated using the ``bpf_map_update_elem()``
+ helper. This helper replaces existing elements atomically. The ``value`` parameter
+ can be ``struct bpf_cpumap_val``.
+
+ .. code-block:: c
+
+ struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+ };
+
+ The flags argument can be one of the following:
+ - BPF_ANY: Create a new element or update an existing element.
+ - BPF_NOEXIST: Create a new element only if it did not exist.
+ - BPF_EXIST: Update an existing element.
+
+.. c:function::
+ int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+ CPU entries can be retrieved using the ``bpf_map_lookup_elem()``
+ helper.
+
+.. c:function::
+ int bpf_map_delete_elem(int fd, const void *key);
+
+ CPU entries can be deleted using the ``bpf_map_delete_elem()``
+ helper. This helper will return 0 on success, or negative error in case of
+ failure.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_CPUMAP`` called
+``cpu_map`` and how to redirect packets to a remote CPU using a round robin scheme.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __type(key, __u32);
+ __type(value, struct bpf_cpumap_val);
+ __uint(max_entries, 12);
+ } cpu_map SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 12);
+ } cpus_available SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+ } cpus_iterator SEC(".maps");
+
+ SEC("xdp")
+ int xdp_redir_cpu_round_robin(struct xdp_md *ctx)
+ {
+ __u32 key = 0;
+ __u32 cpu_dest = 0;
+ __u32 *cpu_selected, *cpu_iterator;
+ __u32 cpu_idx;
+
+ cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key);
+ if (!cpu_iterator)
+ return XDP_ABORTED;
+ cpu_idx = *cpu_iterator;
+
+ *cpu_iterator += 1;
+ if (*cpu_iterator == bpf_num_possible_cpus())
+ *cpu_iterator = 0;
+
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ if (cpu_dest >= bpf_num_possible_cpus())
+ return XDP_ABORTED;
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+ }
+
+Userspace
+---------
+
+The following code snippet shows how to dynamically set the max_entries for a
+CPUMAP to the max number of cpus available on the system.
+
+.. code-block:: c
+
+ int set_max_cpu_entries(struct bpf_map *cpu_map)
+ {
+ if (bpf_map__set_max_entries(cpu_map, libbpf_num_possible_cpus()) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+
+References
+===========
+
+- https://developers.redhat.com/blog/2021/05/13/receive-side-scaling-rss-with-ebpf-and-cpumap#redirecting_into_a_cpumap
diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
new file mode 100644
index 000000000000..31be1aa7ba2c
--- /dev/null
+++ b/Documentation/bpf/map_lpm_trie.rst
@@ -0,0 +1,181 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=====================
+BPF_MAP_TYPE_LPM_TRIE
+=====================
+
+.. note::
+ - ``BPF_MAP_TYPE_LPM_TRIE`` was introduced in kernel version 4.11
+
+``BPF_MAP_TYPE_LPM_TRIE`` provides a longest prefix match algorithm that
+can be used to match IP addresses to a stored set of prefixes.
+Internally, data is stored in an unbalanced trie of nodes that uses
+``prefixlen,data`` pairs as its keys. The ``data`` is interpreted in
+network byte order, i.e. big endian, so ``data[0]`` stores the most
+significant byte.
+
+LPM tries may be created with a maximum prefix length that is a multiple
+of 8, in the range from 8 to 2048. The key used for lookup and update
+operations is a ``struct bpf_lpm_trie_key``, extended by
+``max_prefixlen/8`` bytes.
+
+- For IPv4 addresses the data length is 4 bytes
+- For IPv6 addresses the data length is 16 bytes
+
+The value type stored in the LPM trie can be any user defined type.
+
+.. note::
+ When creating a map of type ``BPF_MAP_TYPE_LPM_TRIE`` you must set the
+ ``BPF_F_NO_PREALLOC`` flag.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+.. c:function::
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+The longest prefix entry for a given data value can be found using the
+``bpf_map_lookup_elem()`` helper. This helper returns a pointer to the
+value associated with the longest matching ``key``, or ``NULL`` if no
+entry was found.
+
+The ``key`` should have ``prefixlen`` set to ``max_prefixlen`` when
+performing longest prefix lookups. For example, when searching for the
+longest prefix match for an IPv4 address, ``prefixlen`` should be set to
+``32``.
+
+.. c:function::
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Prefix entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically.
+
+``bpf_map_update_elem()`` returns ``0`` on success, or negative error in
+case of failure.
+
+ .. note::
+ The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST,
+ but the value is ignored, giving BPF_ANY semantics.
+
+.. c:function::
+ long bpf_map_delete_elem(struct bpf_map *map, const void *key)
+
+Prefix entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case
+of failure.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by ``fd``.
+
+.. c:function::
+ int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key)
+
+A userspace program can iterate through the entries in an LPM trie using
+libbpf's ``bpf_map_get_next_key()`` function. The first key can be
+fetched by calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
+``NULL``. Subsequent calls will fetch the next key that follows the
+current key. ``bpf_map_get_next_key()`` returns ``0`` on success,
+``-ENOENT`` if ``cur_key`` is the last key in the trie, or negative
+error in case of failure.
+
+``bpf_map_get_next_key()`` will iterate through the LPM trie elements
+from leftmost leaf first. This means that iteration will return more
+specific keys before less specific ones.
+
+Examples
+========
+
+Please see ``tools/testing/selftests/bpf/test_lpm_map.c`` for examples
+of LPM trie usage from userspace. The code snippets below demonstrate
+API usage.
+
+Kernel BPF
+----------
+
+The following BPF code snippet shows how to declare a new LPM trie for IPv4
+address prefixes:
+
+.. code-block:: c
+
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ struct ipv4_lpm_key {
+ __u32 prefixlen;
+ __u32 data;
+ };
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct ipv4_lpm_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 255);
+ } ipv4_lpm_map SEC(".maps");
+
+The following BPF code snippet shows how to lookup by IPv4 address:
+
+.. code-block:: c
+
+ void *lookup(__u32 ipaddr)
+ {
+ struct ipv4_lpm_key key = {
+ .prefixlen = 32,
+ .data = ipaddr
+ };
+
+ return bpf_map_lookup_elem(&ipv4_lpm_map, &key);
+ }
+
+Userspace
+---------
+
+The following snippet shows how to insert an IPv4 prefix entry into an
+LPM trie:
+
+.. code-block:: c
+
+ int add_prefix_entry(int lpm_fd, __u32 addr, __u32 prefixlen, struct value *value)
+ {
+ struct ipv4_lpm_key ipv4_key = {
+ .prefixlen = prefixlen,
+ .data = addr
+ };
+ return bpf_map_update_elem(lpm_fd, &ipv4_key, value, BPF_ANY);
+ }
+
+The following snippet shows a userspace program walking through the entries
+of an LPM trie:
+
+
+.. code-block:: c
+
+ #include <bpf/libbpf.h>
+ #include <bpf/bpf.h>
+
+ void iterate_lpm_trie(int map_fd)
+ {
+ struct ipv4_lpm_key *cur_key = NULL;
+ struct ipv4_lpm_key next_key;
+ struct value value;
+ int err;
+
+ for (;;) {
+ err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
+ if (err)
+ break;
+
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
+
+ /* Use key and value here */
+
+ cur_key = &next_key;
+ }
+ }
diff --git a/Documentation/bpf/map_of_maps.rst b/Documentation/bpf/map_of_maps.rst
new file mode 100644
index 000000000000..07212b9227a9
--- /dev/null
+++ b/Documentation/bpf/map_of_maps.rst
@@ -0,0 +1,126 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========================================================
+BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS
+========================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` were
+ introduced in kernel version 4.12
+
+``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` provide general
+purpose support for map in map storage. One level of nesting is supported, where
+an outer map contains instances of a single type of inner map, for example
+``array_of_maps->sock_map``.
+
+When creating an outer map, an inner map instance is used to initialize the
+metadata that the outer map holds about its inner maps. This inner map has a
+separate lifetime from the outer map and can be deleted after the outer map has
+been created.
+
+The outer map supports element lookup, update and delete from user space using
+the syscall API. A BPF program is only allowed to do element lookup in the outer
+map.
+
+.. note::
+ - Multi-level nesting is not supported.
+ - Any BPF map type can be used as an inner map, except for
+ ``BPF_MAP_TYPE_PROG_ARRAY``.
+ - A BPF program cannot update or delete outer map entries.
+
+For ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` the key is an unsigned 32-bit integer index
+into the array. The array is a fixed size with ``max_entries`` elements that are
+zero initialized when created.
+
+For ``BPF_MAP_TYPE_HASH_OF_MAPS`` the key type can be chosen when defining the
+map. The kernel is responsible for allocating and freeing key/value pairs, up to
+the max_entries limit that you specify. Hash maps use pre-allocation of hash
+table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be used to disable
+pre-allocation when it is too memory expensive.
+
+Usage
+=====
+
+Kernel BPF Helper
+-----------------
+
+.. c:function::
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Inner maps can be retrieved using the ``bpf_map_lookup_elem()`` helper. This
+helper returns a pointer to the inner map, or ``NULL`` if no entry was found.
+
+Examples
+========
+
+Kernel BPF Example
+------------------
+
+This snippet shows how to create and initialise an array of devmaps in a BPF
+program. Note that the outer array can only be modified from user space using
+the syscall API.
+
+.. code-block:: c
+
+ struct inner_map {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 10);
+ __type(key, __u32);
+ __type(value, __u32);
+ } inner_map1 SEC(".maps"), inner_map2 SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __array(values, struct inner_map);
+ } outer_map SEC(".maps") = {
+ .values = { &inner_map1,
+ &inner_map2 }
+ };
+
+See ``progs/test_btf_map_in_map.c`` in ``tools/testing/selftests/bpf`` for more
+examples of declarative initialisation of outer maps.
+
+User Space
+----------
+
+This snippet shows how to create an array based outer map:
+
+.. code-block:: c
+
+ int create_outer_array(int inner_fd) {
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .inner_map_fd = inner_fd);
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ "example_array", /* name */
+ sizeof(__u32), /* key size */
+ sizeof(__u32), /* value size */
+ 256, /* max entries */
+ &opts); /* create opts */
+ return fd;
+ }
+
+
+This snippet shows how to add an inner map to an outer map:
+
+.. code-block:: c
+
+ int add_devmap(int outer_fd, int index, const char *name) {
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, name,
+ sizeof(__u32), sizeof(__u32), 256, NULL);
+ if (fd < 0)
+ return fd;
+
+ return bpf_map_update_elem(outer_fd, &index, &fd, BPF_ANY);
+ }
+
+References
+==========
+
+- https://lore.kernel.org/netdev/20170322170035.923581-3-kafai@fb.com/
+- https://lore.kernel.org/netdev/20170322170035.923581-4-kafai@fb.com/
diff --git a/Documentation/bpf/map_queue_stack.rst b/Documentation/bpf/map_queue_stack.rst
new file mode 100644
index 000000000000..f20e31a647b9
--- /dev/null
+++ b/Documentation/bpf/map_queue_stack.rst
@@ -0,0 +1,122 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================================
+BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK
+=========================================
+
+.. note::
+ - ``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` were introduced
+ in kernel version 4.20
+
+``BPF_MAP_TYPE_QUEUE`` provides FIFO storage and ``BPF_MAP_TYPE_STACK``
+provides LIFO storage for BPF programs. These maps support peek, pop and
+push operations that are exposed to BPF programs through the respective
+helpers. These operations are exposed to userspace applications using
+the existing ``bpf`` syscall in the following way:
+
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+- ``BPF_MAP_LOOKUP_AND_DELETE_ELEM`` -> pop
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+
+``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` do not support
+``BPF_F_NO_PREALLOC``.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+.. c:function::
+ long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+An element ``value`` can be added to a queue or stack using the
+``bpf_map_push_elem`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` or ``BPF_EXIST``. If ``flags`` is set to ``BPF_EXIST`` then,
+when the queue or stack is full, the oldest element will be removed to
+make room for ``value`` to be added. Returns ``0`` on success, or
+negative error in case of failure.
+
+.. c:function::
+ long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+This helper fetches an element ``value`` from a queue or stack without
+removing it. Returns ``0`` on success, or negative error in case of
+failure.
+
+.. c:function::
+ long bpf_map_pop_elem(struct bpf_map *map, void *value)
+
+This helper removes an element into ``value`` from a queue or
+stack. Returns ``0`` on success, or negative error in case of failure.
+
+
+Userspace
+---------
+
+.. c:function::
+ int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can push ``value`` onto a queue or stack using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY`` or ``BPF_EXIST``, with the
+same semantics as the ``bpf_map_push_elem`` kernel helper. Returns ``0`` on
+success, or negative error in case of failure.
+
+.. c:function::
+ int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can peek at the ``value`` at the head of a queue or stack
+using the libbpf ``bpf_map_lookup_elem`` function. The ``key`` parameter must be
+set to ``NULL``. Returns ``0`` on success, or negative error in case of
+failure.
+
+.. c:function::
+ int bpf_map_lookup_and_delete_elem (int fd, const void *key, void *value)
+
+A userspace program can pop a ``value`` from the head of a queue or stack using
+the libbpf ``bpf_map_lookup_and_delete_elem`` function. The ``key`` parameter
+must be set to ``NULL``. Returns ``0`` on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a queue in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __type(value, __u32);
+ __uint(max_entries, 10);
+ } queue SEC(".maps");
+
+
+Userspace
+---------
+
+This snippet shows how to use libbpf's low-level API to create a queue from
+userspace:
+
+.. code-block:: c
+
+ int create_queue()
+ {
+ return bpf_map_create(BPF_MAP_TYPE_QUEUE,
+ "sample_queue", /* name */
+ 0, /* key size, must be zero */
+ sizeof(__u32), /* value size */
+ 10, /* max entries */
+ NULL); /* create options */
+ }
+
+
+References
+==========
+
+https://lwn.net/ml/netdev/153986858555.9127.14517764371945179514.stgit@kernel/
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b1ed5a93b6c5..2a4592780141 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1125,7 +1125,7 @@ static int veth_enable_xdp(struct net_device *dev)
int err, i;
rq = &priv->rq[0];
- napi_already_on = (dev->flags & IFF_UP) && rcu_access_pointer(rq->napi);
+ napi_already_on = rcu_access_pointer(rq->napi);
if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8d948bfcb984..798aec816970 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -165,35 +165,43 @@ struct bpf_map_ops {
};
enum {
- /* Support at most 8 pointers in a BPF map value */
- BPF_MAP_VALUE_OFF_MAX = 8,
- BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
- 1 + /* for bpf_spin_lock */
- 1, /* for bpf_timer */
+ /* Support at most 8 pointers in a BTF type */
+ BTF_FIELDS_MAX = 10,
+ BPF_MAP_OFF_ARR_MAX = BTF_FIELDS_MAX,
};
-enum bpf_kptr_type {
- BPF_KPTR_UNREF,
- BPF_KPTR_REF,
+enum btf_field_type {
+ BPF_SPIN_LOCK = (1 << 0),
+ BPF_TIMER = (1 << 1),
+ BPF_KPTR_UNREF = (1 << 2),
+ BPF_KPTR_REF = (1 << 3),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
};
-struct bpf_map_value_off_desc {
+struct btf_field_kptr {
+ struct btf *btf;
+ struct module *module;
+ btf_dtor_kfunc_t dtor;
+ u32 btf_id;
+};
+
+struct btf_field {
u32 offset;
- enum bpf_kptr_type type;
- struct {
- struct btf *btf;
- struct module *module;
- btf_dtor_kfunc_t dtor;
- u32 btf_id;
- } kptr;
+ enum btf_field_type type;
+ union {
+ struct btf_field_kptr kptr;
+ };
};
-struct bpf_map_value_off {
- u32 nr_off;
- struct bpf_map_value_off_desc off[];
+struct btf_record {
+ u32 cnt;
+ u32 field_mask;
+ int spin_lock_off;
+ int timer_off;
+ struct btf_field fields[];
};
-struct bpf_map_off_arr {
+struct btf_field_offs {
u32 cnt;
u32 field_off[BPF_MAP_OFF_ARR_MAX];
u8 field_sz[BPF_MAP_OFF_ARR_MAX];
@@ -214,10 +222,8 @@ struct bpf_map {
u32 max_entries;
u64 map_extra; /* any per-map-type extra fields */
u32 map_flags;
- int spin_lock_off; /* >=0 valid offset, <0 error */
- struct bpf_map_value_off *kptr_off_tab;
- int timer_off; /* >=0 valid offset, <0 error */
u32 id;
+ struct btf_record *record;
int numa_node;
u32 btf_key_type_id;
u32 btf_value_type_id;
@@ -227,7 +233,7 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- struct bpf_map_off_arr *off_arr;
+ struct btf_field_offs *field_offs;
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
@@ -251,33 +257,70 @@ struct bpf_map {
bool frozen; /* write-once; write-protected by freeze_mutex */
};
-static inline bool map_value_has_spin_lock(const struct bpf_map *map)
+static inline const char *btf_field_type_name(enum btf_field_type type)
{
- return map->spin_lock_off >= 0;
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return "bpf_spin_lock";
+ case BPF_TIMER:
+ return "bpf_timer";
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return "kptr";
+ default:
+ WARN_ON_ONCE(1);
+ return "unknown";
+ }
}
-static inline bool map_value_has_timer(const struct bpf_map *map)
+static inline u32 btf_field_type_size(enum btf_field_type type)
{
- return map->timer_off >= 0;
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return sizeof(struct bpf_spin_lock);
+ case BPF_TIMER:
+ return sizeof(struct bpf_timer);
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return sizeof(u64);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
}
-static inline bool map_value_has_kptrs(const struct bpf_map *map)
+static inline u32 btf_field_type_align(enum btf_field_type type)
{
- return !IS_ERR_OR_NULL(map->kptr_off_tab);
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return __alignof__(struct bpf_spin_lock);
+ case BPF_TIMER:
+ return __alignof__(struct bpf_timer);
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return __alignof__(u64);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
+{
+ if (IS_ERR_OR_NULL(rec))
+ return false;
+ return rec->field_mask & type;
}
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
- if (unlikely(map_value_has_spin_lock(map)))
- memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
- if (unlikely(map_value_has_timer(map)))
- memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
- if (unlikely(map_value_has_kptrs(map))) {
- struct bpf_map_value_off *tab = map->kptr_off_tab;
+ if (!IS_ERR_OR_NULL(map->record)) {
+ struct btf_field *fields = map->record->fields;
+ u32 cnt = map->record->cnt;
int i;
- for (i = 0; i < tab->nr_off; i++)
- *(u64 *)(dst + tab->off[i].offset) = 0;
+ for (i = 0; i < cnt; i++)
+ memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
}
}
@@ -298,55 +341,64 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
}
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
-static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy)
+static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
+ void *dst, void *src, u32 size,
+ bool long_memcpy)
{
u32 curr_off = 0;
int i;
- if (likely(!map->off_arr)) {
+ if (likely(!foffs)) {
if (long_memcpy)
- bpf_long_memcpy(dst, src, round_up(map->value_size, 8));
+ bpf_long_memcpy(dst, src, round_up(size, 8));
else
- memcpy(dst, src, map->value_size);
+ memcpy(dst, src, size);
return;
}
- for (i = 0; i < map->off_arr->cnt; i++) {
- u32 next_off = map->off_arr->field_off[i];
+ for (i = 0; i < foffs->cnt; i++) {
+ u32 next_off = foffs->field_off[i];
+ u32 sz = next_off - curr_off;
- memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
- curr_off += map->off_arr->field_sz[i];
+ memcpy(dst + curr_off, src + curr_off, sz);
+ curr_off += foffs->field_sz[i];
}
- memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
+ memcpy(dst + curr_off, src + curr_off, size - curr_off);
}
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
- __copy_map_value(map, dst, src, false);
+ bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false);
}
static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src)
{
- __copy_map_value(map, dst, src, true);
+ bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true);
}
-static inline void zero_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size)
{
u32 curr_off = 0;
int i;
- if (likely(!map->off_arr)) {
- memset(dst, 0, map->value_size);
+ if (likely(!foffs)) {
+ memset(dst, 0, size);
return;
}
- for (i = 0; i < map->off_arr->cnt; i++) {
- u32 next_off = map->off_arr->field_off[i];
+ for (i = 0; i < foffs->cnt; i++) {
+ u32 next_off = foffs->field_off[i];
+ u32 sz = next_off - curr_off;
- memset(dst + curr_off, 0, next_off - curr_off);
- curr_off += map->off_arr->field_sz[i];
+ memset(dst + curr_off, 0, sz);
+ curr_off += foffs->field_sz[i];
}
- memset(dst + curr_off, 0, map->value_size - curr_off);
+ memset(dst + curr_off, 0, size - curr_off);
+}
+
+static inline void zero_map_value(struct bpf_map *map, void *dst)
+{
+ bpf_obj_memzero(map->field_offs, dst, map->value_size);
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
@@ -1699,11 +1751,14 @@ void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
-struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
-void bpf_map_free_kptr_off_tab(struct bpf_map *map);
-struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
-bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
-void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
+struct btf_field *btf_record_find(const struct btf_record *rec,
+ u32 offset, enum btf_field_type type);
+void btf_record_free(struct btf_record *rec);
+void bpf_map_free_record(struct bpf_map *map);
+struct btf_record *btf_record_dup(const struct btf_record *rec);
+bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
+void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
+void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9aababc5d78..d80345fa566b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -163,8 +163,9 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
-struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
- const struct btf_type *t);
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
+ u32 field_mask, u32 value_size);
+struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
@@ -288,6 +289,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
}
+static inline bool btf_type_is_volatile(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE;
+}
+
static inline bool btf_type_is_func(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 94659f6b3395..fb4c911d2a03 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6445,6 +6445,7 @@ struct bpf_sock_ops {
* the outgoing header has not
* been written yet.
*/
+ __u64 skb_hwtstamp;
};
/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 832b2659e96e..672eb17ac421 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -306,14 +306,6 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return 0;
}
-static void check_and_free_fields(struct bpf_array *arr, void *val)
-{
- if (map_value_has_timer(&arr->map))
- bpf_timer_cancel_and_free(val + arr->map.timer_off);
- if (map_value_has_kptrs(&arr->map))
- bpf_map_free_kptrs(&arr->map, val);
-}
-
/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
@@ -335,13 +327,13 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST;
if (unlikely((map_flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)))
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
return -EINVAL;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
copy_map_value(map, val, value);
- check_and_free_fields(array, val);
+ bpf_obj_free_fields(array->map.record, val);
} else {
val = array->value +
(u64)array->elem_size * (index & array->index_mask);
@@ -349,7 +341,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
- check_and_free_fields(array, val);
+ bpf_obj_free_fields(array->map.record, val);
}
return 0;
}
@@ -386,7 +378,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
- check_and_free_fields(array, per_cpu_ptr(pptr, cpu));
+ bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
off += size;
}
rcu_read_unlock();
@@ -409,12 +401,12 @@ static void array_map_free_timers(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- /* We don't reset or free kptr on uref dropping to zero. */
- if (!map_value_has_timer(map))
+ /* We don't reset or free fields other than timer on uref dropping to zero. */
+ if (!btf_record_has_field(map->record, BPF_TIMER))
return;
for (i = 0; i < array->map.max_entries; i++)
- bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
+ bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -423,22 +415,22 @@ static void array_map_free(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- if (map_value_has_kptrs(map)) {
+ if (!IS_ERR_OR_NULL(map->record)) {
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
for (i = 0; i < array->map.max_entries; i++) {
void __percpu *pptr = array->pptrs[i & array->index_mask];
int cpu;
for_each_possible_cpu(cpu) {
- bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu));
+ bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
cond_resched();
}
}
} else {
for (i = 0; i < array->map.max_entries; i++)
- bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
+ bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
}
- bpf_map_free_kptr_off_tab(map);
+ bpf_map_free_record(map);
}
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 93d9b1b17bc8..37020078d1c1 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -382,7 +382,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
/* BPF_F_LOCK can only be used in a value with spin_lock */
unlikely((map_flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(&smap->map)))
+ !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
return ERR_PTR(-EINVAL);
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 35c07afac924..5579ff3a5b54 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3191,7 +3191,7 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
-enum btf_field_type {
+enum btf_field_info_type {
BTF_FIELD_SPIN_LOCK,
BTF_FIELD_TIMER,
BTF_FIELD_KPTR,
@@ -3203,18 +3203,22 @@ enum {
};
struct btf_field_info {
- u32 type_id;
+ enum btf_field_type type;
u32 off;
- enum bpf_kptr_type type;
+ struct {
+ u32 type_id;
+ } kptr;
};
static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
- u32 off, int sz, struct btf_field_info *info)
+ u32 off, int sz, enum btf_field_type field_type,
+ struct btf_field_info *info)
{
if (!__btf_type_is_struct(t))
return BTF_FIELD_IGNORE;
if (t->size != sz)
return BTF_FIELD_IGNORE;
+ info->type = field_type;
info->off = off;
return BTF_FIELD_FOUND;
}
@@ -3222,9 +3226,12 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info)
{
- enum bpf_kptr_type type;
+ enum btf_field_type type;
u32 res_id;
+ /* Permit modifiers on the pointer itself */
+ if (btf_type_is_volatile(t))
+ t = btf_type_by_id(btf, t->type);
/* For PTR, sz is always == 8 */
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
@@ -3248,28 +3255,66 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
if (!__btf_type_is_struct(t))
return -EINVAL;
- info->type_id = res_id;
- info->off = off;
info->type = type;
+ info->off = off;
+ info->kptr.type_id = res_id;
return BTF_FIELD_FOUND;
}
-static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align,
- enum btf_field_type field_type,
+static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
+ int *align, int *sz)
+{
+ int type = 0;
+
+ if (field_mask & BPF_SPIN_LOCK) {
+ if (!strcmp(name, "bpf_spin_lock")) {
+ if (*seen_mask & BPF_SPIN_LOCK)
+ return -E2BIG;
+ *seen_mask |= BPF_SPIN_LOCK;
+ type = BPF_SPIN_LOCK;
+ goto end;
+ }
+ }
+ if (field_mask & BPF_TIMER) {
+ if (!strcmp(name, "bpf_timer")) {
+ if (*seen_mask & BPF_TIMER)
+ return -E2BIG;
+ *seen_mask |= BPF_TIMER;
+ type = BPF_TIMER;
+ goto end;
+ }
+ }
+ /* Only return BPF_KPTR when all other types with matchable names fail */
+ if (field_mask & BPF_KPTR) {
+ type = BPF_KPTR_REF;
+ goto end;
+ }
+ return 0;
+end:
+ *sz = btf_field_type_size(type);
+ *align = btf_field_type_align(type);
+ return type;
+}
+
+static int btf_find_struct_field(const struct btf *btf,
+ const struct btf_type *t, u32 field_mask,
struct btf_field_info *info, int info_cnt)
{
+ int ret, idx = 0, align, sz, field_type;
const struct btf_member *member;
struct btf_field_info tmp;
- int ret, idx = 0;
- u32 i, off;
+ u32 i, off, seen_mask = 0;
for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
- if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
+ field_type = btf_get_field_type(__btf_name_by_offset(btf, member_type->name_off),
+ field_mask, &seen_mask, &align, &sz);
+ if (field_type == 0)
continue;
+ if (field_type < 0)
+ return field_type;
off = __btf_member_bit_offset(t, member);
if (off % 8)
@@ -3277,17 +3322,18 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
return -EINVAL;
off /= 8;
if (off % align)
- return -EINVAL;
+ continue;
switch (field_type) {
- case BTF_FIELD_SPIN_LOCK:
- case BTF_FIELD_TIMER:
- ret = btf_find_struct(btf, member_type, off, sz,
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
return ret;
break;
- case BTF_FIELD_KPTR:
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
ret = btf_find_kptr(btf, member_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3307,37 +3353,41 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
}
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align,
- enum btf_field_type field_type,
- struct btf_field_info *info, int info_cnt)
+ u32 field_mask, struct btf_field_info *info,
+ int info_cnt)
{
+ int ret, idx = 0, align, sz, field_type;
const struct btf_var_secinfo *vsi;
struct btf_field_info tmp;
- int ret, idx = 0;
- u32 i, off;
+ u32 i, off, seen_mask = 0;
for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
- off = vsi->offset;
-
- if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
+ field_type = btf_get_field_type(__btf_name_by_offset(btf, var_type->name_off),
+ field_mask, &seen_mask, &align, &sz);
+ if (field_type == 0)
continue;
+ if (field_type < 0)
+ return field_type;
+
+ off = vsi->offset;
if (vsi->size != sz)
continue;
if (off % align)
- return -EINVAL;
+ continue;
switch (field_type) {
- case BTF_FIELD_SPIN_LOCK:
- case BTF_FIELD_TIMER:
- ret = btf_find_struct(btf, var_type, off, sz,
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
return ret;
break;
- case BTF_FIELD_KPTR:
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
ret = btf_find_kptr(btf, var_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3357,169 +3407,203 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
}
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
- enum btf_field_type field_type,
- struct btf_field_info *info, int info_cnt)
+ u32 field_mask, struct btf_field_info *info,
+ int info_cnt)
{
- const char *name;
- int sz, align;
-
- switch (field_type) {
- case BTF_FIELD_SPIN_LOCK:
- name = "bpf_spin_lock";
- sz = sizeof(struct bpf_spin_lock);
- align = __alignof__(struct bpf_spin_lock);
- break;
- case BTF_FIELD_TIMER:
- name = "bpf_timer";
- sz = sizeof(struct bpf_timer);
- align = __alignof__(struct bpf_timer);
- break;
- case BTF_FIELD_KPTR:
- name = NULL;
- sz = sizeof(u64);
- align = 8;
- break;
- default:
- return -EFAULT;
- }
-
if (__btf_type_is_struct(t))
- return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt);
+ return btf_find_struct_field(btf, t, field_mask, info, info_cnt);
else if (btf_type_is_datasec(t))
- return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt);
+ return btf_find_datasec_var(btf, t, field_mask, info, info_cnt);
return -EINVAL;
}
-/* find 'struct bpf_spin_lock' in map value.
- * return >= 0 offset if found
- * and < 0 in case of error
- */
-int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
{
- struct btf_field_info info;
+ struct module *mod = NULL;
+ const struct btf_type *t;
+ struct btf *kernel_btf;
int ret;
+ s32 id;
- ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1);
- if (ret < 0)
- return ret;
- if (!ret)
- return -ENOENT;
- return info.off;
-}
+ /* Find type in map BTF, and use it to look up the matching type
+ * in vmlinux or module BTFs, by name and kind.
+ */
+ t = btf_type_by_id(btf, info->kptr.type_id);
+ id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
+ &kernel_btf);
+ if (id < 0)
+ return id;
+
+ /* Find and stash the function pointer for the destruction function that
+ * needs to be eventually invoked from the map free path.
+ */
+ if (info->type == BPF_KPTR_REF) {
+ const struct btf_type *dtor_func;
+ const char *dtor_func_name;
+ unsigned long addr;
+ s32 dtor_btf_id;
+
+ /* This call also serves as a whitelist of allowed objects that
+ * can be used as a referenced pointer and be stored in a map at
+ * the same time.
+ */
+ dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
+ if (dtor_btf_id < 0) {
+ ret = dtor_btf_id;
+ goto end_btf;
+ }
-int btf_find_timer(const struct btf *btf, const struct btf_type *t)
-{
- struct btf_field_info info;
- int ret;
+ dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
+ if (!dtor_func) {
+ ret = -ENOENT;
+ goto end_btf;
+ }
- ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1);
- if (ret < 0)
- return ret;
- if (!ret)
- return -ENOENT;
- return info.off;
+ if (btf_is_module(kernel_btf)) {
+ mod = btf_try_get_module(kernel_btf);
+ if (!mod) {
+ ret = -ENXIO;
+ goto end_btf;
+ }
+ }
+
+ /* We already verified dtor_func to be btf_type_is_func
+ * in register_btf_id_dtor_kfuncs.
+ */
+ dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
+ addr = kallsyms_lookup_name(dtor_func_name);
+ if (!addr) {
+ ret = -EINVAL;
+ goto end_mod;
+ }
+ field->kptr.dtor = (void *)addr;
+ }
+
+ field->kptr.btf_id = id;
+ field->kptr.btf = kernel_btf;
+ field->kptr.module = mod;
+ return 0;
+end_mod:
+ module_put(mod);
+end_btf:
+ btf_put(kernel_btf);
+ return ret;
}
-struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
- const struct btf_type *t)
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
+ u32 field_mask, u32 value_size)
{
- struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX];
- struct bpf_map_value_off *tab;
- struct btf *kernel_btf = NULL;
- struct module *mod = NULL;
- int ret, i, nr_off;
+ struct btf_field_info info_arr[BTF_FIELDS_MAX];
+ struct btf_record *rec;
+ int ret, i, cnt;
- ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
+ ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
if (ret < 0)
return ERR_PTR(ret);
if (!ret)
return NULL;
- nr_off = ret;
- tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN);
- if (!tab)
+ cnt = ret;
+ rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
+ if (!rec)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < nr_off; i++) {
- const struct btf_type *t;
- s32 id;
+ rec->spin_lock_off = -EINVAL;
+ rec->timer_off = -EINVAL;
+ for (i = 0; i < cnt; i++) {
+ if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) {
+ WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
+ ret = -EFAULT;
+ goto end;
+ }
- /* Find type in map BTF, and use it to look up the matching type
- * in vmlinux or module BTFs, by name and kind.
- */
- t = btf_type_by_id(btf, info_arr[i].type_id);
- id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
- &kernel_btf);
- if (id < 0) {
- ret = id;
+ rec->field_mask |= info_arr[i].type;
+ rec->fields[i].offset = info_arr[i].off;
+ rec->fields[i].type = info_arr[i].type;
+
+ switch (info_arr[i].type) {
+ case BPF_SPIN_LOCK:
+ WARN_ON_ONCE(rec->spin_lock_off >= 0);
+ /* Cache offset for faster lookup at runtime */
+ rec->spin_lock_off = rec->fields[i].offset;
+ break;
+ case BPF_TIMER:
+ WARN_ON_ONCE(rec->timer_off >= 0);
+ /* Cache offset for faster lookup at runtime */
+ rec->timer_off = rec->fields[i].offset;
+ break;
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
+ if (ret < 0)
+ goto end;
+ break;
+ default:
+ ret = -EFAULT;
goto end;
}
+ rec->cnt++;
+ }
+ return rec;
+end:
+ btf_record_free(rec);
+ return ERR_PTR(ret);
+}
- /* Find and stash the function pointer for the destruction function that
- * needs to be eventually invoked from the map free path.
- */
- if (info_arr[i].type == BPF_KPTR_REF) {
- const struct btf_type *dtor_func;
- const char *dtor_func_name;
- unsigned long addr;
- s32 dtor_btf_id;
-
- /* This call also serves as a whitelist of allowed objects that
- * can be used as a referenced pointer and be stored in a map at
- * the same time.
- */
- dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
- if (dtor_btf_id < 0) {
- ret = dtor_btf_id;
- goto end_btf;
- }
+static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv)
+{
+ const u32 a = *(const u32 *)_a;
+ const u32 b = *(const u32 *)_b;
- dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
- if (!dtor_func) {
- ret = -ENOENT;
- goto end_btf;
- }
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ return 0;
+}
- if (btf_is_module(kernel_btf)) {
- mod = btf_try_get_module(kernel_btf);
- if (!mod) {
- ret = -ENXIO;
- goto end_btf;
- }
- }
+static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv)
+{
+ struct btf_field_offs *foffs = (void *)priv;
+ u32 *off_base = foffs->field_off;
+ u32 *a = _a, *b = _b;
+ u8 *sz_a, *sz_b;
- /* We already verified dtor_func to be btf_type_is_func
- * in register_btf_id_dtor_kfuncs.
- */
- dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
- addr = kallsyms_lookup_name(dtor_func_name);
- if (!addr) {
- ret = -EINVAL;
- goto end_mod;
- }
- tab->off[i].kptr.dtor = (void *)addr;
- }
+ sz_a = foffs->field_sz + (a - off_base);
+ sz_b = foffs->field_sz + (b - off_base);
- tab->off[i].offset = info_arr[i].off;
- tab->off[i].type = info_arr[i].type;
- tab->off[i].kptr.btf_id = id;
- tab->off[i].kptr.btf = kernel_btf;
- tab->off[i].kptr.module = mod;
- }
- tab->nr_off = nr_off;
- return tab;
-end_mod:
- module_put(mod);
-end_btf:
- btf_put(kernel_btf);
-end:
- while (i--) {
- btf_put(tab->off[i].kptr.btf);
- if (tab->off[i].kptr.module)
- module_put(tab->off[i].kptr.module);
+ swap(*a, *b);
+ swap(*sz_a, *sz_b);
+}
+
+struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec)
+{
+ struct btf_field_offs *foffs;
+ u32 i, *off;
+ u8 *sz;
+
+ BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz));
+ if (IS_ERR_OR_NULL(rec) || WARN_ON_ONCE(rec->cnt > sizeof(foffs->field_off)))
+ return NULL;
+
+ foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN);
+ if (!foffs)
+ return ERR_PTR(-ENOMEM);
+
+ off = foffs->field_off;
+ sz = foffs->field_sz;
+ for (i = 0; i < rec->cnt; i++) {
+ off[i] = rec->fields[i].offset;
+ sz[i] = btf_field_type_size(rec->fields[i].type);
}
- kfree(tab);
- return ERR_PTR(ret);
+ foffs->cnt = rec->cnt;
+
+ if (foffs->cnt == 1)
+ return foffs;
+ sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]),
+ btf_field_offs_cmp, btf_field_offs_swap, foffs);
+ return foffs;
}
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
@@ -6367,7 +6451,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
/* kptr_get is only true for kfunc */
if (i == 0 && kptr_get) {
- struct bpf_map_value_off_desc *off_desc;
+ struct btf_field *kptr_field;
if (reg->type != PTR_TO_MAP_VALUE) {
bpf_log(log, "arg#0 expected pointer to map value\n");
@@ -6383,8 +6467,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
- off_desc = bpf_map_kptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value);
- if (!off_desc || off_desc->type != BPF_KPTR_REF) {
+ kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
+ if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n",
reg->off + reg->var_off.value);
return -EINVAL;
@@ -6403,8 +6487,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
func_name, i, btf_type_str(ref_t), ref_tname);
return -EINVAL;
}
- if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->kptr.btf,
- off_desc->kptr.btf_id, true)) {
+ if (!btf_struct_ids_match(log, btf, ref_id, 0, kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id, true)) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n",
func_name, i, btf_type_str(ref_t), ref_tname);
return -EINVAL;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bb03fdba73bb..6b6a78c04b90 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -4,13 +4,16 @@
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
*/
-/* The 'cpumap' is primarily used as a backend map for XDP BPF helper
+/**
+ * DOC: cpu map
+ * The 'cpumap' is primarily used as a backend map for XDP BPF helper
* call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'.
*
- * Unlike devmap which redirects XDP frames out another NIC device,
+ * Unlike devmap which redirects XDP frames out to another NIC device,
* this map type redirects raw XDP frames to another CPU. The remote
* CPU will do SKB-allocation and call the normal network stack.
- *
+ */
+/*
* This is a scalability and isolation mechanism, that allow
* separating the early driver network XDP layer, from the rest of the
* netstack, and assigning dedicated CPUs for this stage. This
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index f39ee3e05589..50d254cd0709 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -222,7 +222,7 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
u32 num_entries = htab->map.max_entries;
int i;
- if (!map_value_has_timer(&htab->map))
+ if (!btf_record_has_field(htab->map.record, BPF_TIMER))
return;
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
@@ -231,28 +231,25 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
struct htab_elem *elem;
elem = get_htab_elem(htab, i);
- bpf_timer_cancel_and_free(elem->key +
- round_up(htab->map.key_size, 8) +
- htab->map.timer_off);
+ bpf_obj_free_timer(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
cond_resched();
}
}
-static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
+static void htab_free_prealloced_fields(struct bpf_htab *htab)
{
u32 num_entries = htab->map.max_entries;
int i;
- if (!map_value_has_kptrs(&htab->map))
+ if (IS_ERR_OR_NULL(htab->map.record))
return;
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
-
for (i = 0; i < num_entries; i++) {
struct htab_elem *elem;
elem = get_htab_elem(htab, i);
- bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8));
+ bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
cond_resched();
}
}
@@ -764,10 +761,7 @@ static void check_and_free_fields(struct bpf_htab *htab,
{
void *map_value = elem->key + round_up(htab->map.key_size, 8);
- if (map_value_has_timer(&htab->map))
- bpf_timer_cancel_and_free(map_value + htab->map.timer_off);
- if (map_value_has_kptrs(&htab->map))
- bpf_map_free_kptrs(&htab->map, map_value);
+ bpf_obj_free_fields(htab->map.record, map_value);
}
/* It is called from the bpf_lru_list when the LRU needs to delete
@@ -1091,7 +1085,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
head = &b->head;
if (unlikely(map_flags & BPF_F_LOCK)) {
- if (unlikely(!map_value_has_spin_lock(map)))
+ if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
return -EINVAL;
/* find an element without taking the bucket lock */
l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
@@ -1474,12 +1468,8 @@ static void htab_free_malloced_timers(struct bpf_htab *htab)
struct htab_elem *l;
hlist_nulls_for_each_entry(l, n, head, hash_node) {
- /* We don't reset or free kptr on uref dropping to zero,
- * hence just free timer.
- */
- bpf_timer_cancel_and_free(l->key +
- round_up(htab->map.key_size, 8) +
- htab->map.timer_off);
+ /* We only free timer on uref dropping to zero */
+ bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8));
}
cond_resched_rcu();
}
@@ -1490,8 +1480,8 @@ static void htab_map_free_timers(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- /* We don't reset or free kptr on uref dropping to zero. */
- if (!map_value_has_timer(&htab->map))
+ /* We only free timer on uref dropping to zero */
+ if (!btf_record_has_field(htab->map.record, BPF_TIMER))
return;
if (!htab_is_prealloc(htab))
htab_free_malloced_timers(htab);
@@ -1517,11 +1507,11 @@ static void htab_map_free(struct bpf_map *map)
if (!htab_is_prealloc(htab)) {
delete_all_elements(htab);
} else {
- htab_free_prealloced_kptrs(htab);
+ htab_free_prealloced_fields(htab);
prealloc_destroy(htab);
}
- bpf_map_free_kptr_off_tab(map);
+ bpf_map_free_record(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
@@ -1675,7 +1665,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
elem_map_flags = attr->batch.elem_flags;
if ((elem_map_flags & ~BPF_F_LOCK) ||
- ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
+ ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
return -EINVAL;
map_flags = attr->batch.flags;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 124fd199ce5c..283f55bbeb70 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -366,9 +366,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
struct bpf_spin_lock *lock;
if (lock_src)
- lock = src + map->spin_lock_off;
+ lock = src + map->record->spin_lock_off;
else
- lock = dst + map->spin_lock_off;
+ lock = dst + map->record->spin_lock_off;
preempt_disable();
__bpf_spin_lock_irqsave(lock);
copy_map_value(map, dst, src);
@@ -1169,7 +1169,7 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
ret = -ENOMEM;
goto out;
}
- t->value = (void *)timer - map->timer_off;
+ t->value = (void *)timer - map->record->timer_off;
t->map = map;
t->prog = NULL;
rcu_assign_pointer(t->callback_fn, NULL);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 098cf336fae6..e90d9f63edc5 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -151,7 +151,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
return -EINVAL;
if (unlikely((flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)))
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
return -EINVAL;
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 135205d0d560..8ca0cca39d49 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -29,7 +29,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
return ERR_PTR(-ENOTSUPP);
}
- if (map_value_has_spin_lock(inner_map)) {
+ if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
}
@@ -50,9 +50,15 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->value_size = inner_map->value_size;
inner_map_meta->map_flags = inner_map->map_flags;
inner_map_meta->max_entries = inner_map->max_entries;
- inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
- inner_map_meta->timer_off = inner_map->timer_off;
- inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map);
+ inner_map_meta->record = btf_record_dup(inner_map->record);
+ if (IS_ERR(inner_map_meta->record)) {
+ /* btf_record_dup returns NULL or valid pointer in case of
+ * invalid/empty/valid, but ERR_PTR in case of errors. During
+ * equality NULL or IS_ERR is equivalent.
+ */
+ fdput(f);
+ return ERR_CAST(inner_map_meta->record);
+ }
if (inner_map->btf) {
btf_get(inner_map->btf);
inner_map_meta->btf = inner_map->btf;
@@ -72,7 +78,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
void bpf_map_meta_free(struct bpf_map *map_meta)
{
- bpf_map_free_kptr_off_tab(map_meta);
+ bpf_map_free_record(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
}
@@ -84,9 +90,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
return meta0->map_type == meta1->map_type &&
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
- meta0->timer_off == meta1->timer_off &&
meta0->map_flags == meta1->map_flags &&
- bpf_map_equal_kptr_off_tab(meta0, meta1);
+ btf_record_equal(meta0->record, meta1->record);
}
void *bpf_map_fd_get_ptr(struct bpf_map *map,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5887592eeb93..85532d301124 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -495,114 +495,152 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
-static int bpf_map_kptr_off_cmp(const void *a, const void *b)
+static int btf_field_cmp(const void *a, const void *b)
{
- const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
+ const struct btf_field *f1 = a, *f2 = b;
- if (off_desc1->offset < off_desc2->offset)
+ if (f1->offset < f2->offset)
return -1;
- else if (off_desc1->offset > off_desc2->offset)
+ else if (f1->offset > f2->offset)
return 1;
return 0;
}
-struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
+struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset,
+ enum btf_field_type type)
{
- /* Since members are iterated in btf_find_field in increasing order,
- * offsets appended to kptr_off_tab are in increasing order, so we can
- * do bsearch to find exact match.
- */
- struct bpf_map_value_off *tab;
+ struct btf_field *field;
- if (!map_value_has_kptrs(map))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type))
+ return NULL;
+ field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp);
+ if (!field || !(field->type & type))
return NULL;
- tab = map->kptr_off_tab;
- return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
+ return field;
}
-void bpf_map_free_kptr_off_tab(struct bpf_map *map)
+void btf_record_free(struct btf_record *rec)
{
- struct bpf_map_value_off *tab = map->kptr_off_tab;
int i;
- if (!map_value_has_kptrs(map))
+ if (IS_ERR_OR_NULL(rec))
return;
- for (i = 0; i < tab->nr_off; i++) {
- if (tab->off[i].kptr.module)
- module_put(tab->off[i].kptr.module);
- btf_put(tab->off[i].kptr.btf);
+ for (i = 0; i < rec->cnt; i++) {
+ switch (rec->fields[i].type) {
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ break;
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ if (rec->fields[i].kptr.module)
+ module_put(rec->fields[i].kptr.module);
+ btf_put(rec->fields[i].kptr.btf);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ continue;
+ }
}
- kfree(tab);
- map->kptr_off_tab = NULL;
+ kfree(rec);
}
-struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
+void bpf_map_free_record(struct bpf_map *map)
{
- struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab;
- int size, i;
+ btf_record_free(map->record);
+ map->record = NULL;
+}
- if (!map_value_has_kptrs(map))
- return ERR_PTR(-ENOENT);
- size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
- new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
- if (!new_tab)
+struct btf_record *btf_record_dup(const struct btf_record *rec)
+{
+ const struct btf_field *fields;
+ struct btf_record *new_rec;
+ int ret, size, i;
+
+ if (IS_ERR_OR_NULL(rec))
+ return NULL;
+ size = offsetof(struct btf_record, fields[rec->cnt]);
+ new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
+ if (!new_rec)
return ERR_PTR(-ENOMEM);
- /* Do a deep copy of the kptr_off_tab */
- for (i = 0; i < tab->nr_off; i++) {
- btf_get(tab->off[i].kptr.btf);
- if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) {
- while (i--) {
- if (tab->off[i].kptr.module)
- module_put(tab->off[i].kptr.module);
- btf_put(tab->off[i].kptr.btf);
+ /* Do a deep copy of the btf_record */
+ fields = rec->fields;
+ new_rec->cnt = 0;
+ for (i = 0; i < rec->cnt; i++) {
+ switch (fields[i].type) {
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ break;
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ btf_get(fields[i].kptr.btf);
+ if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
+ ret = -ENXIO;
+ goto free;
}
- kfree(new_tab);
- return ERR_PTR(-ENXIO);
+ break;
+ default:
+ ret = -EFAULT;
+ WARN_ON_ONCE(1);
+ goto free;
}
+ new_rec->cnt++;
}
- return new_tab;
+ return new_rec;
+free:
+ btf_record_free(new_rec);
+ return ERR_PTR(ret);
}
-bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b)
{
- struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
- bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
+ bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b);
int size;
- if (!a_has_kptr && !b_has_kptr)
+ if (!a_has_fields && !b_has_fields)
return true;
- if (a_has_kptr != b_has_kptr)
+ if (a_has_fields != b_has_fields)
return false;
- if (tab_a->nr_off != tab_b->nr_off)
+ if (rec_a->cnt != rec_b->cnt)
return false;
- size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]);
- return !memcmp(tab_a, tab_b, size);
+ size = offsetof(struct btf_record, fields[rec_a->cnt]);
+ return !memcmp(rec_a, rec_b, size);
}
-/* Caller must ensure map_value_has_kptrs is true. Note that this function can
- * be called on a map value while the map_value is visible to BPF programs, as
- * it ensures the correct synchronization, and we already enforce the same using
- * the bpf_kptr_xchg helper on the BPF program side for referenced kptrs.
- */
-void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
+void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
{
- struct bpf_map_value_off *tab = map->kptr_off_tab;
- unsigned long *btf_id_ptr;
- int i;
+ if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER)))
+ return;
+ bpf_timer_cancel_and_free(obj + rec->timer_off);
+}
- for (i = 0; i < tab->nr_off; i++) {
- struct bpf_map_value_off_desc *off_desc = &tab->off[i];
- unsigned long old_ptr;
+void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
+{
+ const struct btf_field *fields;
+ int i;
- btf_id_ptr = map_value + off_desc->offset;
- if (off_desc->type == BPF_KPTR_UNREF) {
- u64 *p = (u64 *)btf_id_ptr;
+ if (IS_ERR_OR_NULL(rec))
+ return;
+ fields = rec->fields;
+ for (i = 0; i < rec->cnt; i++) {
+ const struct btf_field *field = &fields[i];
+ void *field_ptr = obj + field->offset;
- WRITE_ONCE(*p, 0);
+ switch (fields[i].type) {
+ case BPF_SPIN_LOCK:
+ break;
+ case BPF_TIMER:
+ bpf_timer_cancel_and_free(field_ptr);
+ break;
+ case BPF_KPTR_UNREF:
+ WRITE_ONCE(*(u64 *)field_ptr, 0);
+ break;
+ case BPF_KPTR_REF:
+ field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
+ break;
+ default:
+ WARN_ON_ONCE(1);
continue;
}
- old_ptr = xchg(btf_id_ptr, 0);
- off_desc->kptr.dtor((void *)old_ptr);
}
}
@@ -612,10 +650,10 @@ static void bpf_map_free_deferred(struct work_struct *work)
struct bpf_map *map = container_of(work, struct bpf_map, work);
security_bpf_map_free(map);
- kfree(map->off_arr);
+ kfree(map->field_offs);
bpf_map_release_memcg(map);
/* implementation dependent freeing, map_free callback also does
- * bpf_map_free_kptr_off_tab, if needed.
+ * bpf_map_free_record, if needed.
*/
map->ops->map_free(map);
}
@@ -778,8 +816,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
struct bpf_map *map = filp->private_data;
int err;
- if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
- map_value_has_timer(map) || map_value_has_kptrs(map))
+ if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
return -ENOTSUPP;
if (!(vma->vm_flags & VM_SHARED))
@@ -906,84 +943,6 @@ int map_check_no_btf(const struct bpf_map *map,
return -ENOTSUPP;
}
-static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv)
-{
- const u32 a = *(const u32 *)_a;
- const u32 b = *(const u32 *)_b;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- return 0;
-}
-
-static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
-{
- struct bpf_map *map = (struct bpf_map *)priv;
- u32 *off_base = map->off_arr->field_off;
- u32 *a = _a, *b = _b;
- u8 *sz_a, *sz_b;
-
- sz_a = map->off_arr->field_sz + (a - off_base);
- sz_b = map->off_arr->field_sz + (b - off_base);
-
- swap(*a, *b);
- swap(*sz_a, *sz_b);
-}
-
-static int bpf_map_alloc_off_arr(struct bpf_map *map)
-{
- bool has_spin_lock = map_value_has_spin_lock(map);
- bool has_timer = map_value_has_timer(map);
- bool has_kptrs = map_value_has_kptrs(map);
- struct bpf_map_off_arr *off_arr;
- u32 i;
-
- if (!has_spin_lock && !has_timer && !has_kptrs) {
- map->off_arr = NULL;
- return 0;
- }
-
- off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN);
- if (!off_arr)
- return -ENOMEM;
- map->off_arr = off_arr;
-
- off_arr->cnt = 0;
- if (has_spin_lock) {
- i = off_arr->cnt;
-
- off_arr->field_off[i] = map->spin_lock_off;
- off_arr->field_sz[i] = sizeof(struct bpf_spin_lock);
- off_arr->cnt++;
- }
- if (has_timer) {
- i = off_arr->cnt;
-
- off_arr->field_off[i] = map->timer_off;
- off_arr->field_sz[i] = sizeof(struct bpf_timer);
- off_arr->cnt++;
- }
- if (has_kptrs) {
- struct bpf_map_value_off *tab = map->kptr_off_tab;
- u32 *off = &off_arr->field_off[off_arr->cnt];
- u8 *sz = &off_arr->field_sz[off_arr->cnt];
-
- for (i = 0; i < tab->nr_off; i++) {
- *off++ = tab->off[i].offset;
- *sz++ = sizeof(u64);
- }
- off_arr->cnt += tab->nr_off;
- }
-
- if (off_arr->cnt == 1)
- return 0;
- sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]),
- map_off_arr_cmp, map_off_arr_swap, map);
- return 0;
-}
-
static int map_check_btf(struct bpf_map *map, const struct btf *btf,
u32 btf_key_id, u32 btf_value_id)
{
@@ -1006,40 +965,11 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (!value_type || value_size != map->value_size)
return -EINVAL;
- map->spin_lock_off = btf_find_spin_lock(btf, value_type);
-
- if (map_value_has_spin_lock(map)) {
- if (map->map_flags & BPF_F_RDONLY_PROG)
- return -EACCES;
- if (map->map_type != BPF_MAP_TYPE_HASH &&
- map->map_type != BPF_MAP_TYPE_ARRAY &&
- map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
- map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
- map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
- map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
- map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
- return -ENOTSUPP;
- if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
- map->value_size) {
- WARN_ONCE(1,
- "verifier bug spin_lock_off %d value_size %d\n",
- map->spin_lock_off, map->value_size);
- return -EFAULT;
- }
- }
-
- map->timer_off = btf_find_timer(btf, value_type);
- if (map_value_has_timer(map)) {
- if (map->map_flags & BPF_F_RDONLY_PROG)
- return -EACCES;
- if (map->map_type != BPF_MAP_TYPE_HASH &&
- map->map_type != BPF_MAP_TYPE_LRU_HASH &&
- map->map_type != BPF_MAP_TYPE_ARRAY)
- return -EOPNOTSUPP;
- }
+ map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR,
+ map->value_size);
+ if (!IS_ERR_OR_NULL(map->record)) {
+ int i;
- map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
- if (map_value_has_kptrs(map)) {
if (!bpf_capable()) {
ret = -EPERM;
goto free_map_tab;
@@ -1048,12 +978,45 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
ret = -EACCES;
goto free_map_tab;
}
- if (map->map_type != BPF_MAP_TYPE_HASH &&
- map->map_type != BPF_MAP_TYPE_LRU_HASH &&
- map->map_type != BPF_MAP_TYPE_ARRAY &&
- map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
- ret = -EOPNOTSUPP;
- goto free_map_tab;
+ for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) {
+ switch (map->record->field_mask & (1 << i)) {
+ case 0:
+ continue;
+ case BPF_SPIN_LOCK:
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ break;
+ case BPF_TIMER:
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY) {
+ return -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ break;
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ break;
+ default:
+ /* Fail if map_type checks are missing for a field type */
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
}
}
@@ -1065,7 +1028,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return ret;
free_map_tab:
- bpf_map_free_kptr_off_tab(map);
+ bpf_map_free_record(map);
return ret;
}
@@ -1074,6 +1037,7 @@ free_map_tab:
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
+ struct btf_field_offs *foffs;
struct bpf_map *map;
int f_flags;
int err;
@@ -1118,8 +1082,6 @@ static int map_create(union bpf_attr *attr)
mutex_init(&map->freeze_mutex);
spin_lock_init(&map->owner.lock);
- map->spin_lock_off = -EINVAL;
- map->timer_off = -EINVAL;
if (attr->btf_key_type_id || attr->btf_value_type_id ||
/* Even the map's value is a kernel's struct,
* the bpf_prog.o must have BTF to begin with
@@ -1155,13 +1117,17 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = bpf_map_alloc_off_arr(map);
- if (err)
+
+ foffs = btf_parse_field_offs(map->record);
+ if (IS_ERR(foffs)) {
+ err = PTR_ERR(foffs);
goto free_map;
+ }
+ map->field_offs = foffs;
err = security_bpf_map_alloc(map);
if (err)
- goto free_map_off_arr;
+ goto free_map_field_offs;
err = bpf_map_alloc_id(map);
if (err)
@@ -1185,8 +1151,8 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
-free_map_off_arr:
- kfree(map->off_arr);
+free_map_field_offs:
+ kfree(map->field_offs);
free_map:
btf_put(map->btf);
map->ops->map_free(map);
@@ -1333,7 +1299,7 @@ static int map_lookup_elem(union bpf_attr *attr)
}
if ((attr->flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)) {
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
err = -EINVAL;
goto err_put;
}
@@ -1406,7 +1372,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
}
if ((attr->flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)) {
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
err = -EINVAL;
goto err_put;
}
@@ -1569,7 +1535,7 @@ int generic_map_delete_batch(struct bpf_map *map,
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)) {
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
return -EINVAL;
}
@@ -1626,7 +1592,7 @@ int generic_map_update_batch(struct bpf_map *map,
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)) {
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
return -EINVAL;
}
@@ -1689,7 +1655,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map))
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK))
return -EINVAL;
value_size = bpf_map_value_size(map);
@@ -1811,7 +1777,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
}
if ((attr->flags & BPF_F_LOCK) &&
- !map_value_has_spin_lock(map)) {
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
err = -EINVAL;
goto err_put;
}
@@ -1882,8 +1848,7 @@ static int map_freeze(const union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
- map_value_has_timer(map) || map_value_has_kptrs(map)) {
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
fdput(f);
return -ENOTSUPP;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e97917b97b3f..07c0259dfc1a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -262,7 +262,7 @@ struct bpf_call_arg_meta {
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
- struct bpf_map_value_off_desc *kptr_off_desc;
+ struct btf_field *kptr_field;
u8 uninit_dynptr_regno;
};
@@ -454,14 +454,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return reg->type == PTR_TO_MAP_VALUE &&
- map_value_has_spin_lock(reg->map_ptr);
-}
-
-static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
-{
- type = base_type(type);
- return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK ||
- type == PTR_TO_MEM || type == PTR_TO_BTF_ID;
+ btf_record_has_field(reg->map_ptr->record, BPF_SPIN_LOCK);
}
static bool type_is_rdonly_mem(u32 type)
@@ -511,6 +504,15 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
return func_id == BPF_FUNC_dynptr_data;
}
+static bool is_callback_calling_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_for_each_map_elem ||
+ func_id == BPF_FUNC_timer_set_callback ||
+ func_id == BPF_FUNC_find_vma ||
+ func_id == BPF_FUNC_loop ||
+ func_id == BPF_FUNC_user_ringbuf_drain;
+}
+
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -875,7 +877,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (reg->id)
verbose_a("id=%d", reg->id);
- if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
+ if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
verbose_a("off=%d", reg->off);
@@ -1400,7 +1402,7 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
/* transfer reg's id which is unique for every map_lookup_elem
* as UID of the inner map.
*/
- if (map_value_has_timer(map->inner_map_meta))
+ if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
reg->map_uid = reg->id;
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
@@ -1689,7 +1691,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
reg->type = SCALAR_VALUE;
reg->var_off = tnum_unknown;
reg->frameno = 0;
- reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
+ reg->precise = !env->bpf_capable;
__mark_reg_unbounded(reg);
}
@@ -2658,6 +2660,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
if (opcode == BPF_CALL) {
if (insn->src_reg == BPF_PSEUDO_CALL)
return -ENOTSUPP;
+ /* BPF helpers that invoke callback subprogs are
+ * equivalent to BPF_PSEUDO_CALL above
+ */
+ if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
+ return -ENOTSUPP;
/* regular helper call sets R0 */
*reg_mask &= ~1;
if (*reg_mask & 0x3f) {
@@ -2747,8 +2754,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
/* big hammer: mark all scalars precise in this path.
* pop_stack may still get !precise scalars.
+ * We also skip current state and go straight to first parent state,
+ * because precision markings in current non-checkpointed state are
+ * not needed. See why in the comment in __mark_chain_precision below.
*/
- for (; st; st = st->parent)
+ for (st = st->parent; st; st = st->parent) {
for (i = 0; i <= st->curframe; i++) {
func = st->frame[i];
for (j = 0; j < BPF_REG_FP; j++) {
@@ -2766,9 +2776,122 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
reg->precise = true;
}
}
+ }
}
-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ for (i = 0; i <= st->curframe; i++) {
+ func = st->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+ reg = &func->regs[j];
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = false;
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+ if (!is_spilled_reg(&func->stack[j]))
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = false;
+ }
+ }
+}
+
+/*
+ * __mark_chain_precision() backtracks BPF program instruction sequence and
+ * chain of verifier states making sure that register *regno* (if regno >= 0)
+ * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
+ * SCALARS, as well as any other registers and slots that contribute to
+ * a tracked state of given registers/stack slots, depending on specific BPF
+ * assembly instructions (see backtrack_insns() for exact instruction handling
+ * logic). This backtracking relies on recorded jmp_history and is able to
+ * traverse entire chain of parent states. This process ends only when all the
+ * necessary registers/slots and their transitive dependencies are marked as
+ * precise.
+ *
+ * One important and subtle aspect is that precise marks *do not matter* in
+ * the currently verified state (current state). It is important to understand
+ * why this is the case.
+ *
+ * First, note that current state is the state that is not yet "checkpointed",
+ * i.e., it is not yet put into env->explored_states, and it has no children
+ * states as well. It's ephemeral, and can end up either a) being discarded if
+ * compatible explored state is found at some point or BPF_EXIT instruction is
+ * reached or b) checkpointed and put into env->explored_states, branching out
+ * into one or more children states.
+ *
+ * In the former case, precise markings in current state are completely
+ * ignored by state comparison code (see regsafe() for details). Only
+ * checkpointed ("old") state precise markings are important, and if old
+ * state's register/slot is precise, regsafe() assumes current state's
+ * register/slot as precise and checks value ranges exactly and precisely. If
+ * states turn out to be compatible, current state's necessary precise
+ * markings and any required parent states' precise markings are enforced
+ * after the fact with propagate_precision() logic, after the fact. But it's
+ * important to realize that in this case, even after marking current state
+ * registers/slots as precise, we immediately discard current state. So what
+ * actually matters is any of the precise markings propagated into current
+ * state's parent states, which are always checkpointed (due to b) case above).
+ * As such, for scenario a) it doesn't matter if current state has precise
+ * markings set or not.
+ *
+ * Now, for the scenario b), checkpointing and forking into child(ren)
+ * state(s). Note that before current state gets to checkpointing step, any
+ * processed instruction always assumes precise SCALAR register/slot
+ * knowledge: if precise value or range is useful to prune jump branch, BPF
+ * verifier takes this opportunity enthusiastically. Similarly, when
+ * register's value is used to calculate offset or memory address, exact
+ * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
+ * what we mentioned above about state comparison ignoring precise markings
+ * during state comparison, BPF verifier ignores and also assumes precise
+ * markings *at will* during instruction verification process. But as verifier
+ * assumes precision, it also propagates any precision dependencies across
+ * parent states, which are not yet finalized, so can be further restricted
+ * based on new knowledge gained from restrictions enforced by their children
+ * states. This is so that once those parent states are finalized, i.e., when
+ * they have no more active children state, state comparison logic in
+ * is_state_visited() would enforce strict and precise SCALAR ranges, if
+ * required for correctness.
+ *
+ * To build a bit more intuition, note also that once a state is checkpointed,
+ * the path we took to get to that state is not important. This is crucial
+ * property for state pruning. When state is checkpointed and finalized at
+ * some instruction index, it can be correctly and safely used to "short
+ * circuit" any *compatible* state that reaches exactly the same instruction
+ * index. I.e., if we jumped to that instruction from a completely different
+ * code path than original finalized state was derived from, it doesn't
+ * matter, current state can be discarded because from that instruction
+ * forward having a compatible state will ensure we will safely reach the
+ * exit. States describe preconditions for further exploration, but completely
+ * forget the history of how we got here.
+ *
+ * This also means that even if we needed precise SCALAR range to get to
+ * finalized state, but from that point forward *that same* SCALAR register is
+ * never used in a precise context (i.e., it's precise value is not needed for
+ * correctness), it's correct and safe to mark such register as "imprecise"
+ * (i.e., precise marking set to false). This is what we rely on when we do
+ * not set precise marking in current state. If no child state requires
+ * precision for any given SCALAR register, it's safe to dictate that it can
+ * be imprecise. If any child state does require this register to be precise,
+ * we'll mark it precise later retroactively during precise markings
+ * propagation from child state to parent states.
+ *
+ * Skipping precise marking setting in current state is a mild version of
+ * relying on the above observation. But we can utilize this property even
+ * more aggressively by proactively forgetting any precise marking in the
+ * current state (which we inherited from the parent state), right before we
+ * checkpoint it and branch off into new child state. This is done by
+ * mark_all_scalars_imprecise() to hopefully get more permissive and generic
+ * finalized states which help in short circuiting more future states.
+ */
+static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
int spi)
{
struct bpf_verifier_state *st = env->cur_state;
@@ -2785,18 +2908,18 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
if (!env->bpf_capable)
return 0;
- func = st->frame[st->curframe];
+ /* Do sanity checks against current state of register and/or stack
+ * slot, but don't set precise flag in current state, as precision
+ * tracking in the current state is unnecessary.
+ */
+ func = st->frame[frame];
if (regno >= 0) {
reg = &func->regs[regno];
if (reg->type != SCALAR_VALUE) {
WARN_ONCE(1, "backtracing misuse");
return -EFAULT;
}
- if (!reg->precise)
- new_marks = true;
- else
- reg_mask = 0;
- reg->precise = true;
+ new_marks = true;
}
while (spi >= 0) {
@@ -2809,11 +2932,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
stack_mask = 0;
break;
}
- if (!reg->precise)
- new_marks = true;
- else
- stack_mask = 0;
- reg->precise = true;
+ new_marks = true;
break;
}
@@ -2821,12 +2940,42 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
return 0;
if (!reg_mask && !stack_mask)
return 0;
+
for (;;) {
DECLARE_BITMAP(mask, 64);
u32 history = st->jmp_history_cnt;
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
+
+ if (last_idx < 0) {
+ /* we are at the entry into subprog, which
+ * is expected for global funcs, but only if
+ * requested precise registers are R1-R5
+ * (which are global func's input arguments)
+ */
+ if (st->curframe == 0 &&
+ st->frame[0]->subprogno > 0 &&
+ st->frame[0]->callsite == BPF_MAIN_FUNC &&
+ stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
+ bitmap_from_u64(mask, reg_mask);
+ for_each_set_bit(i, mask, 32) {
+ reg = &st->frame[0]->regs[i];
+ if (reg->type != SCALAR_VALUE) {
+ reg_mask &= ~(1u << i);
+ continue;
+ }
+ reg->precise = true;
+ }
+ return 0;
+ }
+
+ verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
+ st->frame[0]->subprogno, reg_mask, stack_mask);
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+ }
+
for (i = last_idx;;) {
if (skip_first) {
err = 0;
@@ -2866,7 +3015,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
break;
new_marks = false;
- func = st->frame[st->curframe];
+ func = st->frame[frame];
bitmap_from_u64(mask, reg_mask);
for_each_set_bit(i, mask, 32) {
reg = &func->regs[i];
@@ -2932,12 +3081,17 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
int mark_chain_precision(struct bpf_verifier_env *env, int regno)
{
- return __mark_chain_precision(env, regno, -1);
+ return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
+}
+
+static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
+{
+ return __mark_chain_precision(env, frame, regno, -1);
}
-static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
+static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
{
- return __mark_chain_precision(env, -1, spi);
+ return __mark_chain_precision(env, frame, -1, spi);
}
static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -3186,14 +3340,17 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
mark_stack_slot_scratched(env, spi);
- if (!env->allow_ptr_leaks
- && *stype != NOT_INIT
- && *stype != SCALAR_VALUE) {
- /* Reject the write if there's are spilled pointers in
- * range. If we didn't reject here, the ptr status
- * would be erased below (even though not all slots are
- * actually overwritten), possibly opening the door to
- * leaks.
+ if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
+ /* Reject the write if range we may write to has not
+ * been initialized beforehand. If we didn't reject
+ * here, the ptr status would be erased below (even
+ * though not all slots are actually overwritten),
+ * possibly opening the door to leaks.
+ *
+ * We do however catch STACK_INVALID case below, and
+ * only allow reading possibly uninitialized memory
+ * later for CAP_PERFMON, as the write may not happen to
+ * that slot.
*/
verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
insn_idx, i);
@@ -3683,15 +3840,15 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
}
static int map_kptr_match_type(struct bpf_verifier_env *env,
- struct bpf_map_value_off_desc *off_desc,
+ struct btf_field *kptr_field,
struct bpf_reg_state *reg, u32 regno)
{
- const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
+ const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
int perm_flags = PTR_MAYBE_NULL;
const char *reg_name = "";
/* Only unreferenced case accepts untrusted pointers */
- if (off_desc->type == BPF_KPTR_UNREF)
+ if (kptr_field->type == BPF_KPTR_UNREF)
perm_flags |= PTR_UNTRUSTED;
if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@@ -3738,15 +3895,15 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
* strict mode to true for type match.
*/
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
- off_desc->kptr.btf, off_desc->kptr.btf_id,
- off_desc->type == BPF_KPTR_REF))
+ kptr_field->kptr.btf, kptr_field->kptr.btf_id,
+ kptr_field->type == BPF_KPTR_REF))
goto bad_type;
return 0;
bad_type:
verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
reg_type_str(env, reg->type), reg_name);
verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
- if (off_desc->type == BPF_KPTR_UNREF)
+ if (kptr_field->type == BPF_KPTR_UNREF)
verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
targ_name);
else
@@ -3756,7 +3913,7 @@ bad_type:
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
int value_regno, int insn_idx,
- struct bpf_map_value_off_desc *off_desc)
+ struct btf_field *kptr_field)
{
struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
int class = BPF_CLASS(insn->code);
@@ -3766,7 +3923,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* - Reject cases where variable offset may touch kptr
* - size of access (must be BPF_DW)
* - tnum_is_const(reg->var_off)
- * - off_desc->offset == off + reg->var_off.value
+ * - kptr_field->offset == off + reg->var_off.value
*/
/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
if (BPF_MODE(insn->code) != BPF_MEM) {
@@ -3777,7 +3934,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We only allow loading referenced kptr, since it will be marked as
* untrusted, similar to unreferenced kptr.
*/
- if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
+ if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
verbose(env, "store to referenced kptr disallowed\n");
return -EACCES;
}
@@ -3787,19 +3944,19 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We can simply mark the value_regno receiving the pointer
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
- mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
- off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
val_reg = reg_state(env, value_regno);
if (!register_is_null(val_reg) &&
- map_kptr_match_type(env, off_desc, val_reg, value_regno))
+ map_kptr_match_type(env, kptr_field, val_reg, value_regno))
return -EACCES;
} else if (class == BPF_ST) {
if (insn->imm) {
verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
- off_desc->offset);
+ kptr_field->offset);
return -EACCES;
}
} else {
@@ -3818,45 +3975,30 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg = &state->regs[regno];
struct bpf_map *map = reg->map_ptr;
- int err;
+ struct btf_record *rec;
+ int err, i;
err = check_mem_region_access(env, regno, off, size, map->value_size,
zero_size_allowed);
if (err)
return err;
- if (map_value_has_spin_lock(map)) {
- u32 lock = map->spin_lock_off;
+ if (IS_ERR_OR_NULL(map->record))
+ return 0;
+ rec = map->record;
+ for (i = 0; i < rec->cnt; i++) {
+ struct btf_field *field = &rec->fields[i];
+ u32 p = field->offset;
- /* if any part of struct bpf_spin_lock can be touched by
- * load/store reject this program.
- * To check that [x1, x2) overlaps with [y1, y2)
+ /* If any part of a field can be touched by load/store, reject
+ * this program. To check that [x1, x2) overlaps with [y1, y2),
* it is sufficient to check x1 < y2 && y1 < x2.
*/
- if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
- lock < reg->umax_value + off + size) {
- verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
- return -EACCES;
- }
- }
- if (map_value_has_timer(map)) {
- u32 t = map->timer_off;
-
- if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
- t < reg->umax_value + off + size) {
- verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
- return -EACCES;
- }
- }
- if (map_value_has_kptrs(map)) {
- struct bpf_map_value_off *tab = map->kptr_off_tab;
- int i;
-
- for (i = 0; i < tab->nr_off; i++) {
- u32 p = tab->off[i].offset;
-
- if (reg->smin_value + off < p + sizeof(u64) &&
- p < reg->umax_value + off + size) {
+ if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
+ p < reg->umax_value + off + size) {
+ switch (field->type) {
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
if (src != ACCESS_DIRECT) {
verbose(env, "kptr cannot be accessed indirectly by helper\n");
return -EACCES;
@@ -3875,10 +4017,14 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
break;
+ default:
+ verbose(env, "%s cannot be accessed directly by load/store\n",
+ btf_field_type_name(field->type));
+ return -EACCES;
}
}
}
- return err;
+ return 0;
}
#define MAX_PACKET_OFF 0xffff
@@ -4751,7 +4897,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
- struct bpf_map_value_off_desc *kptr_off_desc = NULL;
+ struct btf_field *kptr_field = NULL;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -4765,11 +4911,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
if (tnum_is_const(reg->var_off))
- kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
- off + reg->var_off.value);
- if (kptr_off_desc) {
- err = check_map_kptr_access(env, regno, value_regno, insn_idx,
- kptr_off_desc);
+ kptr_field = btf_record_find(reg->map_ptr->record,
+ off + reg->var_off.value, BPF_KPTR);
+ if (kptr_field) {
+ err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
} else if (t == BPF_READ && value_regno >= 0) {
struct bpf_map *map = reg->map_ptr;
@@ -5160,10 +5305,6 @@ static int check_stack_range_initialized(
}
if (is_spilled_reg(&state->stack[spi]) &&
- base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
- goto mark;
-
- if (is_spilled_reg(&state->stack[spi]) &&
(state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
env->allow_ptr_leaks)) {
if (clobber) {
@@ -5193,6 +5334,11 @@ mark:
mark_reg_read(env, &state->stack[spi].spilled_ptr,
state->stack[spi].spilled_ptr.parent,
REG_LIVE_READ64);
+ /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
+ * be sure that whether stack slot is written to or not. Hence,
+ * we must still conservatively propagate reads upwards even if
+ * helper may write to the entire memory range.
+ */
}
return update_stack_depth(env, state, min_off);
}
@@ -5442,24 +5588,13 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
map->name);
return -EINVAL;
}
- if (!map_value_has_spin_lock(map)) {
- if (map->spin_lock_off == -E2BIG)
- verbose(env,
- "map '%s' has more than one 'struct bpf_spin_lock'\n",
- map->name);
- else if (map->spin_lock_off == -ENOENT)
- verbose(env,
- "map '%s' doesn't have 'struct bpf_spin_lock'\n",
- map->name);
- else
- verbose(env,
- "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
- map->name);
+ if (!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
+ verbose(env, "map '%s' has no valid bpf_spin_lock\n", map->name);
return -EINVAL;
}
- if (map->spin_lock_off != val + reg->off) {
- verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
- val + reg->off);
+ if (map->record->spin_lock_off != val + reg->off) {
+ verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
+ val + reg->off, map->record->spin_lock_off);
return -EINVAL;
}
if (is_lock) {
@@ -5502,24 +5637,13 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
map->name);
return -EINVAL;
}
- if (!map_value_has_timer(map)) {
- if (map->timer_off == -E2BIG)
- verbose(env,
- "map '%s' has more than one 'struct bpf_timer'\n",
- map->name);
- else if (map->timer_off == -ENOENT)
- verbose(env,
- "map '%s' doesn't have 'struct bpf_timer'\n",
- map->name);
- else
- verbose(env,
- "map '%s' is not a struct type or bpf_timer is mangled\n",
- map->name);
+ if (!btf_record_has_field(map->record, BPF_TIMER)) {
+ verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
return -EINVAL;
}
- if (map->timer_off != val + reg->off) {
+ if (map->record->timer_off != val + reg->off) {
verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
- val + reg->off, map->timer_off);
+ val + reg->off, map->record->timer_off);
return -EINVAL;
}
if (meta->map_ptr) {
@@ -5535,10 +5659,9 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- struct bpf_map_value_off_desc *off_desc;
struct bpf_map *map_ptr = reg->map_ptr;
+ struct btf_field *kptr_field;
u32 kptr_off;
- int ret;
if (!tnum_is_const(reg->var_off)) {
verbose(env,
@@ -5551,30 +5674,23 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
map_ptr->name);
return -EINVAL;
}
- if (!map_value_has_kptrs(map_ptr)) {
- ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
- if (ret == -E2BIG)
- verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
- BPF_MAP_VALUE_OFF_MAX);
- else if (ret == -EEXIST)
- verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
- else
- verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
+ if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
+ verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
return -EINVAL;
}
meta->map_ptr = map_ptr;
kptr_off = reg->off + reg->var_off.value;
- off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
- if (!off_desc) {
+ kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
+ if (!kptr_field) {
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
}
- if (off_desc->type != BPF_KPTR_REF) {
+ if (kptr_field->type != BPF_KPTR_REF) {
verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
return -EACCES;
}
- meta->kptr_off_desc = off_desc;
+ meta->kptr_field = kptr_field;
return 0;
}
@@ -5796,7 +5912,7 @@ found:
}
if (meta->func_id == BPF_FUNC_kptr_xchg) {
- if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
+ if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
return -EACCES;
} else {
if (arg_btf_id == BPF_PTR_POISON) {
@@ -6651,6 +6767,10 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *callee,
int insn_idx);
+static int set_callee_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee, int insn_idx);
+
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
@@ -6701,6 +6821,16 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
}
+ /* set_callee_state is used for direct subprog calls, but we are
+ * interested in validating only BPF helpers that can call subprogs as
+ * callbacks
+ */
+ if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
+ verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
+
if (insn->code == (BPF_JMP | BPF_CALL) &&
insn->src_reg == 0 &&
insn->imm == BPF_FUNC_timer_set_callback) {
@@ -7484,7 +7614,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].map_uid = meta.map_uid;
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
if (!type_may_be_null(ret_type) &&
- map_value_has_spin_lock(meta.map_ptr)) {
+ btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
regs[BPF_REG_0].id = ++env->id_gen;
}
break;
@@ -7548,8 +7678,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
if (func_id == BPF_FUNC_kptr_xchg) {
- ret_btf = meta.kptr_off_desc->kptr.btf;
- ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
+ ret_btf = meta.kptr_field->kptr.btf;
+ ret_btf_id = meta.kptr_field->kptr.btf_id;
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
verbose(env, "verifier internal error:");
@@ -9207,6 +9337,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
return err;
return adjust_ptr_min_max_vals(env, insn,
dst_reg, src_reg);
+ } else if (dst_reg->precise) {
+ /* if dst_reg is precise, src_reg should be precise as well */
+ err = mark_chain_precision(env, insn->src_reg);
+ if (err)
+ return err;
}
} else {
/* Pretend the src is a reg with a known value, since we only
@@ -10395,7 +10530,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
dst_reg->type = PTR_TO_MAP_VALUE;
dst_reg->off = aux->map_off;
- if (map_value_has_spin_lock(map))
+ if (btf_record_has_field(map->record, BPF_SPIN_LOCK))
dst_reg->id = ++env->id_gen;
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
insn->src_reg == BPF_PSEUDO_MAP_IDX) {
@@ -11520,7 +11655,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
if (env->explore_alu_limits)
return false;
if (rcur->type == SCALAR_VALUE) {
- if (!rold->precise && !rcur->precise)
+ if (!rold->precise)
return true;
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
@@ -11843,34 +11978,36 @@ static int propagate_precision(struct bpf_verifier_env *env,
{
struct bpf_reg_state *state_reg;
struct bpf_func_state *state;
- int i, err = 0;
+ int i, err = 0, fr;
- state = old->frame[old->curframe];
- state_reg = state->regs;
- for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
- if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
- continue;
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "propagating r%d\n", i);
- err = mark_chain_precision(env, i);
- if (err < 0)
- return err;
- }
+ for (fr = old->curframe; fr >= 0; fr--) {
+ state = old->frame[fr];
+ state_reg = state->regs;
+ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "frame %d: propagating r%d\n", i, fr);
+ err = mark_chain_precision_frame(env, fr, i);
+ if (err < 0)
+ return err;
+ }
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (!is_spilled_reg(&state->stack[i]))
- continue;
- state_reg = &state->stack[i].spilled_ptr;
- if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
- continue;
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "propagating fp%d\n",
- (-i - 1) * BPF_REG_SIZE);
- err = mark_chain_precision_stack(env, i);
- if (err < 0)
- return err;
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (!is_spilled_reg(&state->stack[i]))
+ continue;
+ state_reg = &state->stack[i].spilled_ptr;
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "frame %d: propagating fp%d\n",
+ (-i - 1) * BPF_REG_SIZE, fr);
+ err = mark_chain_precision_stack_frame(env, fr, i);
+ if (err < 0)
+ return err;
+ }
}
return 0;
}
@@ -12065,6 +12202,10 @@ next:
env->prev_jmps_processed = env->jmps_processed;
env->prev_insn_processed = env->insn_processed;
+ /* forget precise markings we inherited, see __mark_chain_precision */
+ if (env->bpf_capable)
+ mark_all_scalars_imprecise(env, cur);
+
/* add new state to the head of linked list */
new = &new_sl->state;
err = copy_verifier_state(new, cur);
@@ -12673,7 +12814,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- if (map_value_has_spin_lock(map)) {
+ if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
return -EINVAL;
@@ -12690,7 +12831,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
}
- if (map_value_has_timer(map)) {
+ if (btf_record_has_field(map->record, BPF_TIMER)) {
if (is_tracing_prog_type(prog_type)) {
verbose(env, "tracing progs cannot use bpf_timer yet\n");
return -EINVAL;
@@ -14613,6 +14754,8 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
BPF_MAIN_FUNC /* callsite */,
0 /* frameno */,
subprog);
+ state->first_insn_idx = env->subprog_info[subprog].start;
+ state->last_insn_idx = -1;
regs = state->frame[state->curframe]->regs;
if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 49884e7de080..9d2288c0736e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -147,7 +147,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
if (!copy_selem)
return NULL;
- if (map_value_has_spin_lock(&smap->map))
+ if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data, true);
else
@@ -566,7 +566,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
if (!nla_value)
goto errout;
- if (map_value_has_spin_lock(&smap->map))
+ if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
copy_map_value_locked(&smap->map, nla_data(nla_value),
sdata->data, true);
else
diff --git a/net/core/filter.c b/net/core/filter.c
index 358d5e70671a..6dd2baf5eeb2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2126,6 +2126,10 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
if (mlen) {
__skb_pull(skb, mlen);
+ if (unlikely(!skb->len)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
/* At ingress, the mac header has already been pulled once.
* At egress, skb_pospull_rcsum has to be done in case that
@@ -8921,6 +8925,10 @@ static bool sock_ops_is_valid_access(int off, int size,
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size,
size_default);
+ case offsetof(struct bpf_sock_ops, skb_hwtstamp):
+ if (size != sizeof(__u64))
+ return false;
+ break;
default:
if (size != size_default)
return false;
@@ -9104,21 +9112,21 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
return insn;
}
-static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
+static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
struct bpf_insn *insn)
{
/* si->dst_reg = skb_shinfo(SKB); */
#ifdef NET_SKBUFF_DATA_USES_OFFSET
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
- BPF_REG_AX, si->src_reg,
+ BPF_REG_AX, skb_reg,
offsetof(struct sk_buff, end));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
- si->dst_reg, si->src_reg,
+ dst_reg, skb_reg,
offsetof(struct sk_buff, head));
- *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+ *insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
#else
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
- si->dst_reg, si->src_reg,
+ dst_reg, skb_reg,
offsetof(struct sk_buff, end));
#endif
@@ -9509,7 +9517,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, gso_segs):
- insn = bpf_convert_shinfo_access(si, insn);
+ insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
@@ -9517,7 +9525,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
case offsetof(struct __sk_buff, gso_size):
- insn = bpf_convert_shinfo_access(si, insn);
+ insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
@@ -9544,7 +9552,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
- insn = bpf_convert_shinfo_access(si, insn);
+ insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
*insn++ = BPF_LDX_MEM(BPF_DW,
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
@@ -10394,6 +10402,25 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
tcp_flags),
si->dst_reg, si->dst_reg, off);
break;
+ case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
+ struct bpf_insn *jmp_on_null_skb;
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ /* Reserve one insn to test skb == NULL */
+ jmp_on_null_skb = insn++;
+ insn = bpf_convert_shinfo_access(si->dst_reg, si->dst_reg, insn);
+ *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ hwtstamps, 8,
+ target_size));
+ *jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0,
+ insn - jmp_on_null_skb - 1);
+ break;
+ }
}
return insn - insn_buf;
}
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index b363503357e5..822c13242251 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -17,48 +17,11 @@
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
-#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
-
-struct {
- __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
- __uint(max_entries, 8);
-} jmp_table SEC(".maps");
-
#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4
-/* Protocol dispatch routine. It tail-calls next BPF program depending
- * on eth proto. Note, we could have used ...
- *
- * bpf_tail_call(skb, &jmp_table, proto);
- *
- * ... but it would need large prog_array and cannot be optimised given
- * the map key is not static.
- */
-static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
-{
- switch (proto) {
- case ETH_P_8021Q:
- case ETH_P_8021AD:
- bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
- break;
- case ETH_P_MPLS_UC:
- case ETH_P_MPLS_MC:
- bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
- break;
- case ETH_P_IP:
- bpf_tail_call(skb, &jmp_table, PARSE_IP);
- break;
- case ETH_P_IPV6:
- bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
- break;
- }
-}
-
struct vlan_hdr {
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
@@ -74,6 +37,8 @@ struct flow_key_record {
__u32 ip_proto;
};
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
+
static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
{
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
@@ -189,7 +154,8 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
}
}
-PROG(PARSE_IP)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_ip(struct __sk_buff *skb)
{
struct globals *g = this_cpu_globals();
__u32 nhoff, verlen, ip_proto;
@@ -217,7 +183,8 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_IPV6)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_ipv6(struct __sk_buff *skb)
{
struct globals *g = this_cpu_globals();
__u32 nhoff, ip_proto;
@@ -240,7 +207,8 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_VLAN)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_vlan(struct __sk_buff *skb)
{
__u32 nhoff, proto;
@@ -256,7 +224,8 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_MPLS)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_mpls(struct __sk_buff *skb)
{
__u32 nhoff, label;
@@ -279,7 +248,49 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
return 0;
}
-SEC("socket/0")
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(max_entries, 8);
+ __array(values, u32 (void *));
+} prog_array_init SEC(".maps") = {
+ .values = {
+ [PARSE_VLAN] = (void *)&bpf_func_vlan,
+ [PARSE_IP] = (void *)&bpf_func_ip,
+ [PARSE_IPV6] = (void *)&bpf_func_ipv6,
+ [PARSE_MPLS] = (void *)&bpf_func_mpls,
+ },
+};
+
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ * bpf_tail_call(skb, &prog_array_init, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &prog_array_init, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
+ break;
+ }
+}
+
+SEC("socket")
int main_prog(struct __sk_buff *skb)
{
__u32 nhoff = ETH_HLEN;
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index cd6fa79df900..56044acbd25d 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -24,10 +24,9 @@ struct pair {
int main(int argc, char **argv)
{
- int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
+ int i, sock, fd, main_prog_fd, hash_map_fd;
struct bpf_program *prog;
struct bpf_object *obj;
- const char *section;
char filename[256];
FILE *f;
@@ -45,26 +44,24 @@ int main(int argc, char **argv)
goto cleanup;
}
- jmp_table_fd = bpf_object__find_map_fd_by_name(obj, "jmp_table");
hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
- if (jmp_table_fd < 0 || hash_map_fd < 0) {
+ if (hash_map_fd < 0) {
fprintf(stderr, "ERROR: finding a map in obj file failed\n");
goto cleanup;
}
+ /* find BPF main program */
+ main_prog_fd = 0;
bpf_object__for_each_program(prog, obj) {
fd = bpf_program__fd(prog);
- section = bpf_program__section_name(prog);
- if (sscanf(section, "socket/%d", &key) != 1) {
- fprintf(stderr, "ERROR: finding prog failed\n");
- goto cleanup;
- }
-
- if (key == 0)
+ if (!strcmp(bpf_program__name(prog), "main_prog"))
main_prog_fd = fd;
- else
- bpf_map_update_elem(jmp_table_fd, &key, &fd, BPF_ANY);
+ }
+
+ if (main_prog_fd == 0) {
+ fprintf(stderr, "ERROR: can't find main_prog\n");
+ goto cleanup;
}
sock = open_raw_sock("lo");
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 5bc696bac27d..93e0b7680b4f 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -22,14 +22,14 @@ struct {
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
*/
-SEC("kprobe/kfree_skb")
+SEC("kprobe/kfree_skb_reason")
int bpf_prog2(struct pt_regs *ctx)
{
long loc = 0;
long init_val = 1;
long *value;
- /* read ip of kfree_skb caller.
+ /* read ip of kfree_skb_reason caller.
* non-portable version of __builtin_return_address(0)
*/
BPF_KPROBE_READ_RET_IP(loc, ctx);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index dd6205c6b6a7..089e408abd7a 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -146,7 +146,8 @@ int main(int ac, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- /* start 'ping' in the background to have some kfree_skb events */
+ /* start 'ping' in the background to have some kfree_skb_reason
+ * events */
f = popen("ping -4 -c5 localhost", "r");
(void) f;
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 68a70ac03c80..b87e4a7fd689 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -815,8 +815,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
if (!btf_id)
continue;
- err = hashmap__append(tab, u32_as_hash_field(btf_id),
- u32_as_hash_field(id));
+ err = hashmap__append(tab, btf_id, id);
if (err) {
p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s",
btf_id, id, strerror(-err));
@@ -875,17 +874,13 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
printf("size %uB", info->btf_size);
n = 0;
- hashmap__for_each_key_entry(btf_prog_table, entry,
- u32_as_hash_field(info->id)) {
- printf("%s%u", n++ == 0 ? " prog_ids " : ",",
- hash_field_as_u32(entry->value));
+ hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
+ printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value);
}
n = 0;
- hashmap__for_each_key_entry(btf_map_table, entry,
- u32_as_hash_field(info->id)) {
- printf("%s%u", n++ == 0 ? " map_ids " : ",",
- hash_field_as_u32(entry->value));
+ hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
+ printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value);
}
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
@@ -907,17 +902,15 @@ show_btf_json(struct bpf_btf_info *info, int fd,
jsonw_name(json_wtr, "prog_ids");
jsonw_start_array(json_wtr); /* prog_ids */
- hashmap__for_each_key_entry(btf_prog_table, entry,
- u32_as_hash_field(info->id)) {
- jsonw_uint(json_wtr, hash_field_as_u32(entry->value));
+ hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
+ jsonw_uint(json_wtr, entry->value);
}
jsonw_end_array(json_wtr); /* prog_ids */
jsonw_name(json_wtr, "map_ids");
jsonw_start_array(json_wtr); /* map_ids */
- hashmap__for_each_key_entry(btf_map_table, entry,
- u32_as_hash_field(info->id)) {
- jsonw_uint(json_wtr, hash_field_as_u32(entry->value));
+ hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
+ jsonw_uint(json_wtr, entry->value);
}
jsonw_end_array(json_wtr); /* map_ids */
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 653c130a0aaa..c90b756945e3 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -497,7 +497,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
goto out_close;
}
- err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path);
+ err = hashmap__append(build_fn_table, pinned_info.id, path);
if (err) {
p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
pinned_info.id, path, strerror(errno));
@@ -548,7 +548,7 @@ void delete_pinned_obj_table(struct hashmap *map)
return;
hashmap__for_each_entry(map, entry, bkt)
- free(entry->value);
+ free(entry->pvalue);
hashmap__free(map);
}
@@ -1044,12 +1044,12 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
return fd;
}
-size_t hash_fn_for_key_as_id(const void *key, void *ctx)
+size_t hash_fn_for_key_as_id(long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx)
{
return k1 == k2;
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index cf8b4e525c88..01bb8d8f5568 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1660,21 +1660,16 @@ struct btfgen_info {
struct btf *marked_btf; /* btf structure used to mark used types */
};
-static size_t btfgen_hash_fn(const void *key, void *ctx)
+static size_t btfgen_hash_fn(long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-static bool btfgen_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool btfgen_equal_fn(long k1, long k2, void *ctx)
{
return k1 == k2;
}
-static void *u32_as_hash_key(__u32 x)
-{
- return (void *)(uintptr_t)x;
-}
-
static void btfgen_free_info(struct btfgen_info *info)
{
if (!info)
@@ -2086,18 +2081,18 @@ static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path)
struct bpf_core_spec specs_scratch[3] = {};
struct bpf_core_relo_res targ_res = {};
struct bpf_core_cand_list *cands = NULL;
- const void *type_key = u32_as_hash_key(relo->type_id);
const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off);
if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
- !hashmap__find(cand_cache, type_key, (void **)&cands)) {
+ !hashmap__find(cand_cache, relo->type_id, &cands)) {
cands = btfgen_find_cands(btf, info->src_btf, relo->type_id);
if (!cands) {
err = -errno;
goto out;
}
- err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
+ err = hashmap__set(cand_cache, relo->type_id, cands,
+ NULL, NULL);
if (err)
goto out;
}
@@ -2120,7 +2115,7 @@ out:
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
- bpf_core_free_cands(entry->value);
+ bpf_core_free_cands(entry->pvalue);
}
hashmap__free(cand_cache);
}
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2863639706dd..6f4cfe01cad4 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -204,9 +204,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
jsonw_name(json_wtr, "pinned");
jsonw_start_array(json_wtr);
- hashmap__for_each_key_entry(link_table, entry,
- u32_as_hash_field(info->id))
- jsonw_string(json_wtr, entry->value);
+ hashmap__for_each_key_entry(link_table, entry, info->id)
+ jsonw_string(json_wtr, entry->pvalue);
jsonw_end_array(json_wtr);
}
@@ -309,9 +308,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
if (!hashmap__empty(link_table)) {
struct hashmap_entry *entry;
- hashmap__for_each_key_entry(link_table, entry,
- u32_as_hash_field(info->id))
- printf("\n\tpinned %s", (char *)entry->value);
+ hashmap__for_each_key_entry(link_table, entry, info->id)
+ printf("\n\tpinned %s", (char *)entry->pvalue);
}
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 467d8472df0c..d4e8a1aef787 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -240,8 +240,8 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
int print_all_levels(__maybe_unused enum libbpf_print_level level,
const char *format, va_list args);
-size_t hash_fn_for_key_as_id(const void *key, void *ctx);
-bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
+size_t hash_fn_for_key_as_id(long key, void *ctx);
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx);
/* bpf_attach_type_input_str - convert the provided attach type value into a
* textual representation that we accept for input purposes.
@@ -257,16 +257,6 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
*/
const char *bpf_attach_type_input_str(enum bpf_attach_type t);
-static inline void *u32_as_hash_field(__u32 x)
-{
- return (void *)(uintptr_t)x;
-}
-
-static inline __u32 hash_field_as_u32(const void *x)
-{
- return (__u32)(uintptr_t)x;
-}
-
static inline bool hashmap__empty(struct hashmap *map)
{
return map ? hashmap__size(map) == 0 : true;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f941ac5c7b73..d884070a2314 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -518,9 +518,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_name(json_wtr, "pinned");
jsonw_start_array(json_wtr);
- hashmap__for_each_key_entry(map_table, entry,
- u32_as_hash_field(info->id))
- jsonw_string(json_wtr, entry->value);
+ hashmap__for_each_key_entry(map_table, entry, info->id)
+ jsonw_string(json_wtr, entry->pvalue);
jsonw_end_array(json_wtr);
}
@@ -595,9 +594,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (!hashmap__empty(map_table)) {
struct hashmap_entry *entry;
- hashmap__for_each_key_entry(map_table, entry,
- u32_as_hash_field(info->id))
- printf("\n\tpinned %s", (char *)entry->value);
+ hashmap__for_each_key_entry(map_table, entry, info->id)
+ printf("\n\tpinned %s", (char *)entry->pvalue);
}
if (frozen_str) {
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index bb6c969a114a..00c77edb6331 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -36,8 +36,8 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e)
int err, i;
void *tmp;
- hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) {
- refs = entry->value;
+ hashmap__for_each_key_entry(map, entry, e->id) {
+ refs = entry->pvalue;
for (i = 0; i < refs->ref_cnt; i++) {
if (refs->refs[i].pid == e->pid)
@@ -81,7 +81,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e)
refs->has_bpf_cookie = e->has_bpf_cookie;
refs->bpf_cookie = e->bpf_cookie;
- err = hashmap__append(map, u32_as_hash_field(e->id), refs);
+ err = hashmap__append(map, e->id, refs);
if (err)
p_err("failed to append entry to hashmap for ID %u: %s",
e->id, strerror(errno));
@@ -183,7 +183,7 @@ void delete_obj_refs_table(struct hashmap *map)
return;
hashmap__for_each_entry(map, entry, bkt) {
- struct obj_refs *refs = entry->value;
+ struct obj_refs *refs = entry->pvalue;
free(refs->refs);
free(refs);
@@ -200,8 +200,8 @@ void emit_obj_refs_json(struct hashmap *map, __u32 id,
if (hashmap__empty(map))
return;
- hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
- struct obj_refs *refs = entry->value;
+ hashmap__for_each_key_entry(map, entry, id) {
+ struct obj_refs *refs = entry->pvalue;
int i;
if (refs->ref_cnt == 0)
@@ -232,8 +232,8 @@ void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix)
if (hashmap__empty(map))
return;
- hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
- struct obj_refs *refs = entry->value;
+ hashmap__for_each_key_entry(map, entry, id) {
+ struct obj_refs *refs = entry->pvalue;
int i;
if (refs->ref_cnt == 0)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a858b907da16..9d32ffb9f22e 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -486,9 +486,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_name(json_wtr, "pinned");
jsonw_start_array(json_wtr);
- hashmap__for_each_key_entry(prog_table, entry,
- u32_as_hash_field(info->id))
- jsonw_string(json_wtr, entry->value);
+ hashmap__for_each_key_entry(prog_table, entry, info->id)
+ jsonw_string(json_wtr, entry->pvalue);
jsonw_end_array(json_wtr);
}
@@ -561,9 +560,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
if (!hashmap__empty(prog_table)) {
struct hashmap_entry *entry;
- hashmap__for_each_key_entry(prog_table, entry,
- u32_as_hash_field(info->id))
- printf("\n\tpinned %s", (char *)entry->value);
+ hashmap__for_each_key_entry(prog_table, entry, info->id)
+ printf("\n\tpinned %s", (char *)entry->pvalue);
}
if (info->btf_id)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 94659f6b3395..fb4c911d2a03 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6445,6 +6445,7 @@ struct bpf_sock_ops {
* the outgoing header has not
* been written yet.
*/
+ __u64 skb_hwtstamp;
};
/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 675a0df5c840..3bd812bf88ff 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1559,15 +1559,15 @@ struct btf_pipe {
static int btf_rewrite_str(__u32 *str_off, void *ctx)
{
struct btf_pipe *p = ctx;
- void *mapped_off;
+ long mapped_off;
int off, err;
if (!*str_off) /* nothing to do for empty strings */
return 0;
if (p->str_off_map &&
- hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
- *str_off = (__u32)(long)mapped_off;
+ hashmap__find(p->str_off_map, *str_off, &mapped_off)) {
+ *str_off = mapped_off;
return 0;
}
@@ -1579,7 +1579,7 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
* performing expensive string comparisons.
*/
if (p->str_off_map) {
- err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+ err = hashmap__append(p->str_off_map, *str_off, off);
if (err)
return err;
}
@@ -1630,8 +1630,8 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
return 0;
}
-static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
-static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx);
int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
@@ -2881,6 +2881,7 @@ static int btf_dedup_strings(struct btf_dedup *d);
static int btf_dedup_prim_types(struct btf_dedup *d);
static int btf_dedup_struct_types(struct btf_dedup *d);
static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_resolve_fwds(struct btf_dedup *d);
static int btf_dedup_compact_types(struct btf_dedup *d);
static int btf_dedup_remap_types(struct btf_dedup *d);
@@ -2988,15 +2989,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* Algorithm summary
* =================
*
- * Algorithm completes its work in 6 separate passes:
+ * Algorithm completes its work in 7 separate passes:
*
* 1. Strings deduplication.
* 2. Primitive types deduplication (int, enum, fwd).
* 3. Struct/union types deduplication.
- * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ * 4. Resolve unambiguous forward declarations.
+ * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
* protos, and const/volatile/restrict modifiers).
- * 5. Types compaction.
- * 6. Types remapping.
+ * 6. Types compaction.
+ * 7. Types remapping.
*
* Algorithm determines canonical type descriptor, which is a single
* representative type for each truly unique type. This canonical type is the
@@ -3060,6 +3062,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
pr_debug("btf_dedup_struct_types failed:%d\n", err);
goto done;
}
+ err = btf_dedup_resolve_fwds(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_resolve_fwds failed:%d\n", err);
+ goto done;
+ }
err = btf_dedup_ref_types(d);
if (err < 0) {
pr_debug("btf_dedup_ref_types failed:%d\n", err);
@@ -3126,12 +3133,11 @@ static long hash_combine(long h, long value)
}
#define for_each_dedup_cand(d, node, hash) \
- hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+ hashmap__for_each_key_entry(d->dedup_table, node, hash)
static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
{
- return hashmap__append(d->dedup_table,
- (void *)hash, (void *)(long)type_id);
+ return hashmap__append(d->dedup_table, hash, type_id);
}
static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -3178,17 +3184,17 @@ static void btf_dedup_free(struct btf_dedup *d)
free(d);
}
-static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_collision_hash_fn(long key, void *ctx)
{
return 0;
}
-static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx)
{
return k1 == k2;
}
@@ -3404,23 +3410,17 @@ static long btf_hash_enum(struct btf_type *t)
{
long h;
- /* don't hash vlen and enum members to support enum fwd resolving */
+ /* don't hash vlen, enum members and size to support enum fwd resolving */
h = hash_combine(0, t->name_off);
- h = hash_combine(h, t->info & ~0xffff);
- h = hash_combine(h, t->size);
return h;
}
-/* Check structural equality of two ENUMs. */
-static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2)
{
const struct btf_enum *m1, *m2;
__u16 vlen;
int i;
- if (!btf_equal_common(t1, t2))
- return false;
-
vlen = btf_vlen(t1);
m1 = btf_enum(t1);
m2 = btf_enum(t2);
@@ -3433,15 +3433,12 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
-static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2)
{
const struct btf_enum64 *m1, *m2;
__u16 vlen;
int i;
- if (!btf_equal_common(t1, t2))
- return false;
-
vlen = btf_vlen(t1);
m1 = btf_enum64(t1);
m2 = btf_enum64(t2);
@@ -3455,6 +3452,19 @@ static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
return true;
}
+/* Check structural equality of two ENUMs or ENUM64s. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ /* t1 & t2 kinds are identical because of btf_equal_common */
+ if (btf_kind(t1) == BTF_KIND_ENUM)
+ return btf_equal_enum_members(t1, t2);
+ else
+ return btf_equal_enum64_members(t1, t2);
+}
+
static inline bool btf_is_enum_fwd(struct btf_type *t)
{
return btf_is_any_enum(t) && btf_vlen(t) == 0;
@@ -3464,21 +3474,14 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
{
if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
return btf_equal_enum(t1, t2);
- /* ignore vlen when comparing */
- return t1->name_off == t2->name_off &&
- (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
- t1->size == t2->size;
-}
-
-static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2)
-{
- if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
- return btf_equal_enum64(t1, t2);
-
- /* ignore vlen when comparing */
+ /* At this point either t1 or t2 or both are forward declarations, thus:
+ * - skip comparing vlen because it is zero for forward declarations;
+ * - skip comparing size to allow enum forward declarations
+ * to be compatible with enum64 full declarations;
+ * - skip comparing kind for the same reason.
+ */
return t1->name_off == t2->name_off &&
- (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
- t1->size == t2->size;
+ btf_is_any_enum(t1) && btf_is_any_enum(t2);
}
/*
@@ -3753,7 +3756,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_INT:
h = btf_hash_int_decl_tag(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int_tag(t, cand)) {
new_id = cand_id;
@@ -3763,9 +3766,10 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum(t, cand)) {
new_id = cand_id;
@@ -3783,32 +3787,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
}
break;
- case BTF_KIND_ENUM64:
- h = btf_hash_enum(t);
- for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
- cand = btf_type_by_id(d->btf, cand_id);
- if (btf_equal_enum64(t, cand)) {
- new_id = cand_id;
- break;
- }
- if (btf_compat_enum64(t, cand)) {
- if (btf_is_enum_fwd(t)) {
- /* resolve fwd to full enum */
- new_id = cand_id;
- break;
- }
- /* resolve canonical enum fwd to full enum */
- d->map[cand_id] = type_id;
- }
- }
- break;
-
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -4099,10 +4082,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_equal_int_tag(cand_type, canon_type);
case BTF_KIND_ENUM:
- return btf_compat_enum(cand_type, canon_type);
-
case BTF_KIND_ENUM64:
- return btf_compat_enum64(cand_type, canon_type);
+ return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
@@ -4313,7 +4294,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_struct(t);
for_each_dedup_cand(d, hash_entry, h) {
- __u32 cand_id = (__u32)(long)hash_entry->value;
+ __u32 cand_id = hash_entry->value;
int eq;
/*
@@ -4418,7 +4399,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -4435,7 +4416,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_int_decl_tag(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int_tag(t, cand)) {
new_id = cand_id;
@@ -4459,7 +4440,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_array(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_array(t, cand)) {
new_id = cand_id;
@@ -4491,7 +4472,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_fnproto(t, cand)) {
new_id = cand_id;
@@ -4528,6 +4509,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
}
/*
+ * Collect a map from type names to type ids for all canonical structs
+ * and unions. If the same name is shared by several canonical types
+ * use a special value 0 to indicate this fact.
+ */
+static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
+{
+ __u32 nr_types = btf__type_cnt(d->btf);
+ struct btf_type *t;
+ __u32 type_id;
+ __u16 kind;
+ int err;
+
+ /*
+ * Iterate over base and split module ids in order to get all
+ * available structs in the map.
+ */
+ for (type_id = 1; type_id < nr_types; ++type_id) {
+ t = btf_type_by_id(d->btf, type_id);
+ kind = btf_kind(t);
+
+ if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ continue;
+
+ /* Skip non-canonical types */
+ if (type_id != d->map[type_id])
+ continue;
+
+ err = hashmap__add(names_map, t->name_off, type_id);
+ if (err == -EEXIST)
+ err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
+{
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
+ enum btf_fwd_kind fwd_kind = btf_kflag(t);
+ __u16 cand_kind, kind = btf_kind(t);
+ struct btf_type *cand_t;
+ uintptr_t cand_id;
+
+ if (kind != BTF_KIND_FWD)
+ return 0;
+
+ /* Skip if this FWD already has a mapping */
+ if (type_id != d->map[type_id])
+ return 0;
+
+ if (!hashmap__find(names_map, t->name_off, &cand_id))
+ return 0;
+
+ /* Zero is a special value indicating that name is not unique */
+ if (!cand_id)
+ return 0;
+
+ cand_t = btf_type_by_id(d->btf, cand_id);
+ cand_kind = btf_kind(cand_t);
+ if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
+ (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))
+ return 0;
+
+ d->map[type_id] = cand_id;
+
+ return 0;
+}
+
+/*
+ * Resolve unambiguous forward declarations.
+ *
+ * The lion's share of all FWD declarations is resolved during
+ * `btf_dedup_struct_types` phase when different type graphs are
+ * compared against each other. However, if in some compilation unit a
+ * FWD declaration is not a part of a type graph compared against
+ * another type graph that declaration's canonical type would not be
+ * changed. Example:
+ *
+ * CU #1:
+ *
+ * struct foo;
+ * struct foo *some_global;
+ *
+ * CU #2:
+ *
+ * struct foo { int u; };
+ * struct foo *another_global;
+ *
+ * After `btf_dedup_struct_types` the BTF looks as follows:
+ *
+ * [1] STRUCT 'foo' size=4 vlen=1 ...
+ * [2] INT 'int' size=4 ...
+ * [3] PTR '(anon)' type_id=1
+ * [4] FWD 'foo' fwd_kind=struct
+ * [5] PTR '(anon)' type_id=4
+ *
+ * This pass assumes that such FWD declarations should be mapped to
+ * structs or unions with identical name in case if the name is not
+ * ambiguous.
+ */
+static int btf_dedup_resolve_fwds(struct btf_dedup *d)
+{
+ int i, err;
+ struct hashmap *names_map;
+
+ names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(names_map))
+ return PTR_ERR(names_map);
+
+ err = btf_dedup_fill_unique_names_map(d, names_map);
+ if (err < 0)
+ goto exit;
+
+ for (i = 0; i < d->btf->nr_types; i++) {
+ err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
+ if (err < 0)
+ break;
+ }
+
+exit:
+ hashmap__free(names_map);
+ return err;
+}
+
+/*
* Compact types.
*
* After we established for each type its corresponding canonical representative
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bf0cc0e986dd..12f7039e0ab2 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -117,14 +117,14 @@ struct btf_dump {
struct btf_dump_data *typed_dump;
};
-static size_t str_hash_fn(const void *key, void *ctx)
+static size_t str_hash_fn(long key, void *ctx)
{
- return str_hash(key);
+ return str_hash((void *)key);
}
-static bool str_equal_fn(const void *a, const void *b, void *ctx)
+static bool str_equal_fn(long a, long b, void *ctx)
{
- return strcmp(a, b) == 0;
+ return strcmp((void *)a, (void *)b) == 0;
}
static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
@@ -225,7 +225,7 @@ static void btf_dump_free_names(struct hashmap *map)
struct hashmap_entry *cur;
hashmap__for_each_entry(map, cur, bkt)
- free((void *)cur->key);
+ free((void *)cur->pkey);
hashmap__free(map);
}
@@ -1543,11 +1543,10 @@ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
if (!new_name)
return 1;
- hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+ hashmap__find(name_map, orig_name, &dup_cnt);
dup_cnt++;
- err = hashmap__set(name_map, new_name, (void *)dup_cnt,
- (const void **)&old_name, NULL);
+ err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
if (err)
free(new_name);
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index aeb09c288716..140ee4055676 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
}
static bool hashmap_find_entry(const struct hashmap *map,
- const void *key, size_t hash,
+ const long key, size_t hash,
struct hashmap_entry ***pprev,
struct hashmap_entry **entry)
{
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
return false;
}
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value)
{
struct hashmap_entry *entry;
size_t h;
int err;
if (old_key)
- *old_key = NULL;
+ *old_key = 0;
if (old_value)
- *old_value = NULL;
+ *old_value = 0;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
return 0;
}
-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
{
struct hashmap_entry *entry;
size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
return true;
}
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+ long *old_key, long *old_value)
{
struct hashmap_entry **pprev, *entry;
size_t h;
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index 10a4c4cd13cf..0a5bf1937a7c 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
return h;
}
-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ * long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ * type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ * hasmap_entry->key should be used for integer keys and
+ * hasmap_entry->pkey should be used for pointer keys,
+ * same goes for values.
+ */
struct hashmap_entry {
- const void *key;
- void *value;
+ union {
+ long key;
+ const void *pkey;
+ };
+ union {
+ long value;
+ void *pvalue;
+ };
struct hashmap_entry *next;
};
@@ -102,6 +122,13 @@ enum hashmap_insert_strategy {
HASHMAP_APPEND,
};
+#define hashmap_cast_ptr(p) ({ \
+ _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \
+ sizeof(*(p)) == sizeof(long), \
+ #p " pointee should be a long-sized integer or a pointer"); \
+ (long *)(p); \
+})
+
/*
* hashmap__insert() adds key/value entry w/ various semantics, depending on
* provided strategy value. If a given key/value pair replaced already
@@ -109,42 +136,38 @@ enum hashmap_insert_strategy {
* through old_key and old_value to allow calling code do proper memory
* management.
*/
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value);
-static inline int hashmap__add(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+ hashmap_insert((map), (long)(key), (long)(value), (strategy), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-static inline int hashmap__set(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_SET,
- old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
-static inline int hashmap__update(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_UPDATE,
- old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
-static inline int hashmap__append(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value) \
+ hashmap_delete((map), (long)(key), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value);
+bool hashmap_find(const struct hashmap *map, long key, long *value);
-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+#define hashmap__find(map, key, value) \
+ hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
/*
* hashmap__for_each_entry - iterate over all entries in hashmap
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5d7819edf074..1d263885d635 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5601,21 +5601,16 @@ int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
}
-static size_t bpf_core_hash_fn(const void *key, void *ctx)
+static size_t bpf_core_hash_fn(const long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
{
return k1 == k2;
}
-static void *u32_as_hash_key(__u32 x)
-{
- return (void *)(uintptr_t)x;
-}
-
static int record_relo_core(struct bpf_program *prog,
const struct bpf_core_relo *core_relo, int insn_idx)
{
@@ -5658,7 +5653,6 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
struct bpf_core_relo_res *targ_res)
{
struct bpf_core_spec specs_scratch[3] = {};
- const void *type_key = u32_as_hash_key(relo->type_id);
struct bpf_core_cand_list *cands = NULL;
const char *prog_name = prog->name;
const struct btf_type *local_type;
@@ -5675,7 +5669,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
return -EINVAL;
if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
- !hashmap__find(cand_cache, type_key, (void **)&cands)) {
+ !hashmap__find(cand_cache, local_id, &cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
@@ -5683,7 +5677,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
local_name, PTR_ERR(cands));
return PTR_ERR(cands);
}
- err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
+ err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
if (err) {
bpf_core_free_cands(cands);
return err;
@@ -5806,7 +5800,7 @@ out:
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
- bpf_core_free_cands(entry->value);
+ bpf_core_free_cands(entry->pvalue);
}
hashmap__free(cand_cache);
}
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
index ea655318153f..2464bcbd04e0 100644
--- a/tools/lib/bpf/strset.c
+++ b/tools/lib/bpf/strset.c
@@ -19,19 +19,19 @@ struct strset {
struct hashmap *strs_hash;
};
-static size_t strset_hash_fn(const void *key, void *ctx)
+static size_t strset_hash_fn(long key, void *ctx)
{
const struct strset *s = ctx;
- const char *str = s->strs_data + (long)key;
+ const char *str = s->strs_data + key;
return str_hash(str);
}
-static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+static bool strset_equal_fn(long key1, long key2, void *ctx)
{
const struct strset *s = ctx;
- const char *str1 = s->strs_data + (long)key1;
- const char *str2 = s->strs_data + (long)key2;
+ const char *str1 = s->strs_data + key1;
+ const char *str2 = s->strs_data + key2;
return strcmp(str1, str2) == 0;
}
@@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini
/* hashmap__add() returns EEXIST if string with the same
* content already is in the hash map
*/
- err = hashmap__add(hash, (void *)off, (void *)off);
+ err = hashmap__add(hash, off, off);
if (err == -EEXIST)
continue; /* duplicate */
if (err)
@@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s)
new_off = set->strs_data_len;
memcpy(p, s, len);
- if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+ if (hashmap__find(set->strs_hash, new_off, &old_off))
return old_off;
return -ENOENT;
@@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s)
* contents doesn't exist already (HASHMAP_ADD strategy). If such
* string exists, we'll get its offset in old_off (that's old_key).
*/
- err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
+ err = hashmap__insert(set->strs_hash, new_off, new_off,
+ HASHMAP_ADD, &old_off, NULL);
if (err == -EEXIST)
return old_off; /* duplicated string, return existing offset */
if (err)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 28fa1b2283de..b8daae265f99 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -873,31 +873,27 @@ static void bpf_link_usdt_dealloc(struct bpf_link *link)
free(usdt_link);
}
-static size_t specs_hash_fn(const void *key, void *ctx)
+static size_t specs_hash_fn(long key, void *ctx)
{
- const char *s = key;
-
- return str_hash(s);
+ return str_hash((char *)key);
}
-static bool specs_equal_fn(const void *key1, const void *key2, void *ctx)
+static bool specs_equal_fn(long key1, long key2, void *ctx)
{
- const char *s1 = key1;
- const char *s2 = key2;
-
- return strcmp(s1, s2) == 0;
+ return strcmp((char *)key1, (char *)key2) == 0;
}
static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
struct bpf_link_usdt *link, struct usdt_target *target,
int *spec_id, bool *is_new)
{
- void *tmp;
+ long tmp;
+ void *new_ids;
int err;
/* check if we already allocated spec ID for this spec string */
if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
- *spec_id = (long)tmp;
+ *spec_id = tmp;
*is_new = false;
return 0;
}
@@ -905,17 +901,17 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
/* otherwise it's a new ID that needs to be set up in specs map and
* returned back to usdt_manager when USDT link is detached
*/
- tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
- if (!tmp)
+ new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
+ if (!new_ids)
return -ENOMEM;
- link->spec_ids = tmp;
+ link->spec_ids = new_ids;
/* get next free spec ID, giving preference to free list, if not empty */
if (man->free_spec_cnt) {
*spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
/* cache spec ID for current spec string for future lookups */
- err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ err = hashmap__add(specs_hash, target->spec_str, *spec_id);
if (err)
return err;
@@ -928,7 +924,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
*spec_id = man->next_free_spec_id;
/* cache spec ID for current spec string for future lookups */
- err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ err = hashmap__add(specs_hash, target->spec_str, *spec_id);
if (err)
return err;
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 6512f5e22045..c598f95aebf3 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -130,12 +130,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO",
ctx) == 0);
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3);
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR",
- (void **)&val_ptr));
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ",
- (void **)&val_ptr));
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO",
- (void **)&val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", &val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", &val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", &val_ptr));
expr__ctx_clear(ctx);
ctx->sctx.runtime = 3;
@@ -143,20 +140,16 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
NULL, ctx) == 0);
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@",
- (void **)&val_ptr));
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@",
- (void **)&val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", &val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", &val_ptr));
expr__ctx_clear(ctx);
TEST_ASSERT_VAL("find ids",
expr__find_ids("dash\\-event1 - dash\\-event2",
NULL, ctx) == 0);
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1",
- (void **)&val_ptr));
- TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2",
- (void **)&val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", &val_ptr));
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", &val_ptr));
/* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */
{
@@ -174,7 +167,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
smton ? "EVENT1" : "EVENT2",
- (void **)&val_ptr));
+ &val_ptr));
expr__ctx_clear(ctx);
TEST_ASSERT_VAL("find ids",
@@ -183,7 +176,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
corewide ? "EVENT1" : "EVENT2",
- (void **)&val_ptr));
+ &val_ptr));
}
/* The expression is a constant 1.0 without needing to evaluate EVENT1. */
@@ -220,8 +213,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
expr__find_ids("source_count(EVENT1)",
NULL, ctx) == 0);
TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1);
- TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1",
- (void **)&val_ptr));
+ TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
expr__ctx_free(ctx);
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 097e05c796ab..3c2ee55e75c7 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -986,10 +986,10 @@ static int metric_parse_fake(const char *str)
*/
i = 1;
hashmap__for_each_entry(ctx->ids, cur, bkt)
- expr__add_id_val(ctx, strdup(cur->key), i++);
+ expr__add_id_val(ctx, strdup(cur->pkey), i++);
hashmap__for_each_entry(ctx->ids, cur, bkt) {
- if (check_parse_fake(cur->key)) {
+ if (check_parse_fake(cur->pkey)) {
pr_err("check_parse_fake failed\n");
goto out;
}
@@ -1003,7 +1003,7 @@ static int metric_parse_fake(const char *str)
*/
i = 1024;
hashmap__for_each_entry(ctx->ids, cur, bkt)
- expr__add_id_val(ctx, strdup(cur->key), i--);
+ expr__add_id_val(ctx, strdup(cur->pkey), i--);
if (expr__parse(&result, ctx, str)) {
pr_err("expr__parse failed\n");
ret = -1;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f4adeccdbbcb..a5dbd71cb9ab 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -318,7 +318,7 @@ static void bpf_program_hash_free(void)
return;
hashmap__for_each_entry(bpf_program_hash, cur, bkt)
- clear_prog_priv(cur->key, cur->value);
+ clear_prog_priv(cur->pkey, cur->pvalue);
hashmap__free(bpf_program_hash);
bpf_program_hash = NULL;
@@ -339,13 +339,12 @@ void bpf__clear(void)
bpf_map_hash_free();
}
-static size_t ptr_hash(const void *__key, void *ctx __maybe_unused)
+static size_t ptr_hash(const long __key, void *ctx __maybe_unused)
{
- return (size_t) __key;
+ return __key;
}
-static bool ptr_equal(const void *key1, const void *key2,
- void *ctx __maybe_unused)
+static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused)
{
return key1 == key2;
}
@@ -1185,7 +1184,7 @@ static void bpf_map_hash_free(void)
return;
hashmap__for_each_entry(bpf_map_hash, cur, bkt)
- bpf_map_priv__clear(cur->key, cur->value);
+ bpf_map_priv__clear(cur->pkey, cur->pvalue);
hashmap__free(bpf_map_hash);
bpf_map_hash = NULL;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 76605fde3507..9e8a1294c981 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3123,7 +3123,7 @@ void evsel__zero_per_pkg(struct evsel *evsel)
if (evsel->per_pkg_mask) {
hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
- free((char *)cur->key);
+ free((void *)cur->pkey);
hashmap__clear(evsel->per_pkg_mask);
}
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index aaacf514dc09..2f05ecdcfe9a 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -46,7 +46,7 @@ struct expr_id_data {
} kind;
};
-static size_t key_hash(const void *key, void *ctx __maybe_unused)
+static size_t key_hash(long key, void *ctx __maybe_unused)
{
const char *str = (const char *)key;
size_t hash = 0;
@@ -59,8 +59,7 @@ static size_t key_hash(const void *key, void *ctx __maybe_unused)
return hash;
}
-static bool key_equal(const void *key1, const void *key2,
- void *ctx __maybe_unused)
+static bool key_equal(long key1, long key2, void *ctx __maybe_unused)
{
return !strcmp((const char *)key1, (const char *)key2);
}
@@ -84,8 +83,8 @@ void ids__free(struct hashmap *ids)
return;
hashmap__for_each_entry(ids, cur, bkt) {
- free((char *)cur->key);
- free(cur->value);
+ free((void *)cur->pkey);
+ free((void *)cur->pvalue);
}
hashmap__free(ids);
@@ -97,8 +96,7 @@ int ids__insert(struct hashmap *ids, const char *id)
char *old_key = NULL;
int ret;
- ret = hashmap__set(ids, id, data_ptr,
- (const void **)&old_key, (void **)&old_data);
+ ret = hashmap__set(ids, id, data_ptr, &old_key, &old_data);
if (ret)
free(data_ptr);
free(old_key);
@@ -127,8 +125,7 @@ struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2)
ids2 = tmp;
}
hashmap__for_each_entry(ids2, cur, bkt) {
- ret = hashmap__set(ids1, cur->key, cur->value,
- (const void **)&old_key, (void **)&old_data);
+ ret = hashmap__set(ids1, cur->key, cur->value, &old_key, &old_data);
free(old_key);
free(old_data);
@@ -169,8 +166,7 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
data_ptr->val.source_count = source_count;
data_ptr->kind = EXPR_ID_DATA__VALUE;
- ret = hashmap__set(ctx->ids, id, data_ptr,
- (const void **)&old_key, (void **)&old_data);
+ ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data);
if (ret)
free(data_ptr);
free(old_key);
@@ -205,8 +201,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
data_ptr->ref.metric_expr = ref->metric_expr;
data_ptr->kind = EXPR_ID_DATA__REF;
- ret = hashmap__set(ctx->ids, name, data_ptr,
- (const void **)&old_key, (void **)&old_data);
+ ret = hashmap__set(ctx->ids, name, data_ptr, &old_key, &old_data);
if (ret)
free(data_ptr);
@@ -221,7 +216,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **data)
{
- return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1;
+ return hashmap__find(ctx->ids, id, data) ? 0 : -1;
}
bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
@@ -232,7 +227,7 @@ bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
struct expr_id_data *data;
hashmap__for_each_entry(needles->ids, cur, bkt) {
- if (expr__get_id(haystack, cur->key, &data))
+ if (expr__get_id(haystack, cur->pkey, &data))
return false;
}
return true;
@@ -282,8 +277,7 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
struct expr_id_data *old_val = NULL;
char *old_key = NULL;
- hashmap__delete(ctx->ids, id,
- (const void **)&old_key, (void **)&old_val);
+ hashmap__delete(ctx->ids, id, &old_key, &old_val);
free(old_key);
free(old_val);
}
@@ -314,8 +308,8 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx)
size_t bkt;
hashmap__for_each_entry(ctx->ids, cur, bkt) {
- free((char *)cur->key);
- free(cur->value);
+ free((void *)cur->pkey);
+ free(cur->pvalue);
}
hashmap__clear(ctx->ids);
}
@@ -330,8 +324,8 @@ void expr__ctx_free(struct expr_parse_ctx *ctx)
free(ctx->sctx.user_requested_cpu_list);
hashmap__for_each_entry(ctx->ids, cur, bkt) {
- free((char *)cur->key);
- free(cur->value);
+ free((void *)cur->pkey);
+ free(cur->pvalue);
}
hashmap__free(ctx->ids);
free(ctx);
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
index aeb09c288716..140ee4055676 100644
--- a/tools/perf/util/hashmap.c
+++ b/tools/perf/util/hashmap.c
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
}
static bool hashmap_find_entry(const struct hashmap *map,
- const void *key, size_t hash,
+ const long key, size_t hash,
struct hashmap_entry ***pprev,
struct hashmap_entry **entry)
{
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
return false;
}
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value)
{
struct hashmap_entry *entry;
size_t h;
int err;
if (old_key)
- *old_key = NULL;
+ *old_key = 0;
if (old_value)
- *old_value = NULL;
+ *old_value = 0;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
return 0;
}
-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
{
struct hashmap_entry *entry;
size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
return true;
}
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+ long *old_key, long *old_value)
{
struct hashmap_entry **pprev, *entry;
size_t h;
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index 10a4c4cd13cf..0a5bf1937a7c 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
return h;
}
-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ * long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ * type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ * hasmap_entry->key should be used for integer keys and
+ * hasmap_entry->pkey should be used for pointer keys,
+ * same goes for values.
+ */
struct hashmap_entry {
- const void *key;
- void *value;
+ union {
+ long key;
+ const void *pkey;
+ };
+ union {
+ long value;
+ void *pvalue;
+ };
struct hashmap_entry *next;
};
@@ -102,6 +122,13 @@ enum hashmap_insert_strategy {
HASHMAP_APPEND,
};
+#define hashmap_cast_ptr(p) ({ \
+ _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \
+ sizeof(*(p)) == sizeof(long), \
+ #p " pointee should be a long-sized integer or a pointer"); \
+ (long *)(p); \
+})
+
/*
* hashmap__insert() adds key/value entry w/ various semantics, depending on
* provided strategy value. If a given key/value pair replaced already
@@ -109,42 +136,38 @@ enum hashmap_insert_strategy {
* through old_key and old_value to allow calling code do proper memory
* management.
*/
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value);
-static inline int hashmap__add(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+ hashmap_insert((map), (long)(key), (long)(value), (strategy), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-static inline int hashmap__set(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_SET,
- old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
-static inline int hashmap__update(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_UPDATE,
- old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
-static inline int hashmap__append(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value) \
+ hashmap_delete((map), (long)(key), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value);
+bool hashmap_find(const struct hashmap *map, long key, long *value);
-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+#define hashmap__find(map, key, value) \
+ hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
/*
* hashmap__for_each_entry - iterate over all entries in hashmap
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 4c98ac29ee13..6b3505b1b6ac 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -288,7 +288,7 @@ static int setup_metric_events(struct hashmap *ids,
* combined or shared groups, this metric may not care
* about this event.
*/
- if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
+ if (hashmap__find(ids, metric_id, &val_ptr)) {
metric_events[matched_events++] = ev;
if (matched_events >= ids_size)
@@ -764,7 +764,7 @@ static int metricgroup__build_event_string(struct strbuf *events,
#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
hashmap__for_each_entry(ctx->ids, cur, bkt) {
- const char *sep, *rsep, *id = cur->key;
+ const char *sep, *rsep, *id = cur->pkey;
enum perf_tool_event ev;
pr_debug("found event %s\n", id);
@@ -945,14 +945,14 @@ static int resolve_metric(struct list_head *metric_list,
hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
struct pmu_event pe;
- if (metricgroup__find_metric(cur->key, table, &pe)) {
+ if (metricgroup__find_metric(cur->pkey, table, &pe)) {
pending = realloc(pending,
(pending_cnt + 1) * sizeof(struct to_resolve));
if (!pending)
return -ENOMEM;
memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe));
- pending[pending_cnt].key = cur->key;
+ pending[pending_cnt].key = cur->pkey;
pending_cnt++;
}
}
@@ -1433,7 +1433,7 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
list_for_each_entry(m, metric_list, nd) {
if (m->has_constraint && !m->modifier) {
hashmap__for_each_entry(m->pctx->ids, cur, bkt) {
- dup = strdup(cur->key);
+ dup = strdup(cur->pkey);
if (!dup) {
ret = -ENOMEM;
goto err_out;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 07b29fe272c7..0bf71b02aa06 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -398,7 +398,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
i = 0;
hashmap__for_each_entry(ctx->ids, cur, bkt) {
- const char *metric_name = (const char *)cur->key;
+ const char *metric_name = cur->pkey;
found = false;
if (leader) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8ec8bb4a9912..c0656f85bfa5 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -278,15 +278,14 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
}
}
-static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
+static size_t pkg_id_hash(long __key, void *ctx __maybe_unused)
{
uint64_t *key = (uint64_t *) __key;
return *key & 0xffffffff;
}
-static bool pkg_id_equal(const void *__key1, const void *__key2,
- void *ctx __maybe_unused)
+static bool pkg_id_equal(long __key1, long __key2, void *ctx __maybe_unused)
{
uint64_t *key1 = (uint64_t *) __key1;
uint64_t *key2 = (uint64_t *) __key2;
@@ -347,11 +346,11 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
return -ENOMEM;
*key = (uint64_t)d << 32 | s;
- if (hashmap__find(mask, (void *)key, NULL)) {
+ if (hashmap__find(mask, key, NULL)) {
*skip = true;
free(key);
} else
- ret = hashmap__add(mask, (void *)key, (void *)1);
+ ret = hashmap__add(mask, key, 1);
return ret;
}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 79edef1dbda4..f3cd17026ee5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -182,14 +182,15 @@ endif
$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
$(call msg,LIB,,$@)
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \
- -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -fPIC -shared -o $@
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
liburandom_read.so $(LDLIBS) \
- -fuse-ld=$(LLD) -Wl,-znoseparate-code \
- -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,-rpath=. -o $@
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@)
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a3352a64c067..10587a29b967 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -20,6 +20,25 @@ static inline unsigned int bpf_num_possible_cpus(void)
return possible_cpus;
}
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
#define BPF_DECLARE_PERCPU(type, name) \
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e914cc45b766..dd1aa5afcf5a 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -13,6 +13,7 @@
#include <ftw.h>
#include "cgroup_helpers.h"
+#include "bpf_util.h"
/*
* To avoid relying on the system setup, when setup_cgroup_env is called
@@ -77,7 +78,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers
enable[len] = 0;
close(fd);
} else {
- strncpy(enable, controllers, sizeof(enable));
+ bpf_strlcpy(enable, controllers, sizeof(enable));
}
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 970f09156eb4..4666f88f2bb4 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -2,7 +2,7 @@
#include <test_progs.h>
#define MAX_INSNS 512
-#define MAX_MATCHES 16
+#define MAX_MATCHES 24
struct bpf_reg_match {
unsigned int line;
@@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = {
*/
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
@@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = {
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
@@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = {
{15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
{15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
/* Variable offset is added to R5 packet pointer,
- * resulting in auxiliary alignment of 4.
+ * resulting in auxiliary alignment of 4. To avoid BPF
+ * verifier's precision backtracking logging
+ * interfering we also have a no-op R4 = R5
+ * instruction to validate R5 state. We also check
+ * that R4 is what it should be in such case.
*/
- {17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
* aligned, so the total offset is 4-byte aligned and
* meets the load's requirements.
*/
- {23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
- {23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+ {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+ {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {25, "R5_w=pkt(off=14,r=8"},
+ {26, "R5_w=pkt(off=14,r=8"},
/* Variable offset is added to R5, resulting in a
- * variable offset of (4n).
+ * variable offset of (4n). See comment for insn #18
+ * for R4 = R5 trick.
*/
- {26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+ {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+ {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
- {33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+ {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+ {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
},
},
{
@@ -681,6 +691,6 @@ void test_align(void)
if (!test__start_subtest(test->descr))
continue;
- CHECK_FAIL(do_test_single(test));
+ ASSERT_OK(do_test_single(test), test->descr);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 24dd6214394e..95a2b80f0d17 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7133,7 +7133,7 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_ENUM_ENC(NAME_NTH(4), 456),
/* [4] fwd enum 'e2' after full enum */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
- /* [5] incompatible fwd enum with different size */
+ /* [5] fwd enum with different size, size does not matter for fwd */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
/* [6] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
@@ -7150,9 +7150,7 @@ static struct btf_dedup_test dedup_tests[] = {
/* [2] full enum 'e2' */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(4), 456),
- /* [3] incompatible fwd enum with different size */
- BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
- /* [4] incompatible full enum with different value */
+ /* [3] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(2), 321),
BTF_END_RAW,
@@ -7611,7 +7609,263 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0e1\0e1_val"),
},
},
-
+{
+ .descr = "dedup: enum of different size: no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum fwd to enum64",
+ .input = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] enum 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: enum64 fwd to enum",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration struct",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration union",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ * Same as "dedup: standalone fwd declaration struct" but for unions.
+ *
+ * // CU 1:
+ * union foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_TBD, 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration wrong kind",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration name conflict",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - two candidates for CU2:foo dedup, thus it is unchanged;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ *
+ * // CU 3:
+ * struct foo { int x; int y; };
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+ },
+},
};
static int btf_type_size(const struct btf_type *t)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index 90aac437576d..d9024c7a892a 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -143,6 +143,10 @@ static void test_split_fwd_resolve() {
btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */
btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
/* } */
+ /* keep this not a part of type the graph to test btf_dedup_resolve_fwds */
+ btf__add_struct(btf1, "s3", 4); /* [6] struct s3 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
VALIDATE_RAW_BTF(
btf1,
@@ -153,20 +157,24 @@ static void test_split_fwd_resolve() {
"\t'f1' type_id=2 bits_offset=0\n"
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0");
btf2 = btf__new_empty_split(btf1);
if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
goto cleanup;
- btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */
- btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */
- btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */
- btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */
- btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */
- btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */
- btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */
+ btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [7] int */
+ btf__add_ptr(btf2, 11); /* [8] ptr to struct s1 */
+ btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [9] fwd for struct s2 */
+ btf__add_ptr(btf2, 9); /* [10] ptr to fwd struct s2 */
+ btf__add_struct(btf2, "s1", 16); /* [11] struct s1 { */
+ btf__add_field(btf2, "f1", 8, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf2, "f2", 10, 64, 0); /* struct s2 *f2; */
/* } */
+ btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT); /* [12] fwd for struct s3 */
+ btf__add_ptr(btf2, 12); /* [13] ptr to struct s1 */
VALIDATE_RAW_BTF(
btf2,
@@ -178,13 +186,17 @@ static void test_split_fwd_resolve() {
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0",
- "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
- "[7] PTR '(anon)' type_id=10",
- "[8] FWD 's2' fwd_kind=struct",
- "[9] PTR '(anon)' type_id=8",
- "[10] STRUCT 's1' size=16 vlen=2\n"
- "\t'f1' type_id=7 bits_offset=0\n"
- "\t'f2' type_id=9 bits_offset=64");
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[8] PTR '(anon)' type_id=11",
+ "[9] FWD 's2' fwd_kind=struct",
+ "[10] PTR '(anon)' type_id=9",
+ "[11] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=8 bits_offset=0\n"
+ "\t'f2' type_id=10 bits_offset=64",
+ "[12] FWD 's3' fwd_kind=struct",
+ "[13] PTR '(anon)' type_id=12");
err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
@@ -199,7 +211,10 @@ static void test_split_fwd_resolve() {
"\t'f1' type_id=2 bits_offset=0\n"
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
- "\t'f1' type_id=1 bits_offset=0");
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] PTR '(anon)' type_id=6");
cleanup:
btf__free(btf2);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 24da335482d4..0ba2e8b9c6ac 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -791,11 +791,11 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
sizeof(struct bpf_sock_ops) - 1,
"(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
- { .op = 1, .skb_tcp_flags = 2});
+ { .op = 1, .skb_hwtstamp = 2});
TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
sizeof(struct bpf_sock_ops) - 1,
"(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
- { .op = 1, .skb_tcp_flags = 0});
+ { .op = 1, .skb_hwtstamp = 0});
}
static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 4747ab18f97f..d358a223fd2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -7,17 +7,18 @@
*/
#include "test_progs.h"
#include "bpf/hashmap.h"
+#include <stddef.h>
static int duration = 0;
-static size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(long k, void *ctx)
{
- return (long)k;
+ return k;
}
-static bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(long a, long b, void *ctx)
{
- return (long)a == (long)b;
+ return a == b;
}
static inline size_t next_pow_2(size_t n)
@@ -52,8 +53,8 @@ static void test_hashmap_generic(void)
return;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(1024 + i);
+ long oldk, k = i;
+ long oldv, v = 1024 + i;
err = hashmap__update(map, k, v, &oldk, &oldv);
if (CHECK(err != -ENOENT, "hashmap__update",
@@ -64,20 +65,18 @@ static void test_hashmap_generic(void)
err = hashmap__add(map, k, v);
} else {
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
- "unexpected k/v: %p=%p\n", oldk, oldv))
+ if (CHECK(oldk != 0 || oldv != 0, "check_kv",
+ "unexpected k/v: %ld=%ld\n", oldk, oldv))
goto cleanup;
}
- if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
- if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -91,8 +90,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 1024, "check_kv",
@@ -104,8 +103,8 @@ static void test_hashmap_generic(void)
goto cleanup;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(256 + i);
+ long oldk, k = i;
+ long oldv, v = 256 + i;
err = hashmap__add(map, k, v);
if (CHECK(err != -EEXIST, "hashmap__add",
@@ -119,13 +118,13 @@ static void test_hashmap_generic(void)
if (CHECK(err, "elem_upd",
"failed to update k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -139,8 +138,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 256, "elem_check",
@@ -152,7 +151,7 @@ static void test_hashmap_generic(void)
goto cleanup;
found_cnt = 0;
- hashmap__for_each_key_entry(map, entry, (void *)0) {
+ hashmap__for_each_key_entry(map, entry, 0) {
found_cnt++;
}
if (CHECK(!found_cnt, "found_cnt",
@@ -161,27 +160,25 @@ static void test_hashmap_generic(void)
found_msk = 0;
found_cnt = 0;
- hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
- const void *oldk, *k;
- void *oldv, *v;
+ hashmap__for_each_key_entry_safe(map, entry, tmp, 0) {
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "check_old",
"invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)oldk, (long)oldv))
+ "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv))
goto cleanup;
}
@@ -198,26 +195,24 @@ static void test_hashmap_generic(void)
goto cleanup;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- const void *oldk, *k;
- void *oldv, *v;
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "elem_check",
"invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)k, (long)v))
+ "unexpectedly deleted k/v %ld = %ld\n", k, v))
goto cleanup;
}
@@ -235,7 +230,7 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
goto cleanup;
}
@@ -243,22 +238,107 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
+ goto cleanup;
+ }
+
+cleanup:
+ hashmap__free(map);
+}
+
+static size_t str_hash_fn(long a, void *ctx)
+{
+ return str_hash((char *)a);
+}
+
+static bool str_equal_fn(long a, long b, void *ctx)
+{
+ return strcmp((char *)a, (char *)b) == 0;
+}
+
+/* Verify that hashmap interface works with pointer keys and values */
+static void test_hashmap_ptr_iface(void)
+{
+ const char *key, *value, *old_key, *old_value;
+ struct hashmap_entry *cur;
+ struct hashmap *map;
+ int err, i, bkt;
+
+ map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n"))
goto cleanup;
+
+#define CHECK_STR(fn, var, expected) \
+ CHECK(strcmp(var, (expected)), (fn), \
+ "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected))
+
+ err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "a");
+ CHECK_STR("hashmap__update", old_value, "apricot");
+
+ err = hashmap__add(map, "b", "banana");
+ if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value);
+ if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__set", old_key, "b");
+ CHECK_STR("hashmap__set", old_value, "banana");
+
+ err = hashmap__update(map, "b", "blueberry", &old_key, &old_value);
+ if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "b");
+ CHECK_STR("hashmap__update", old_value, "breadfruit");
+
+ err = hashmap__append(map, "c", "cherry");
+ if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value),
+ "hashmap__delete", "expected to have entry for 'c'\n"))
+ goto cleanup;
+ CHECK_STR("hashmap__delete", old_key, "c");
+ CHECK_STR("hashmap__delete", old_value, "cherry");
+
+ CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n");
+ CHECK_STR("hashmap__find", value, "blueberry");
+
+ if (CHECK(!hashmap__delete(map, "b", NULL, NULL),
+ "hashmap__delete", "expected to have entry for 'b'\n"))
+ goto cleanup;
+
+ i = 0;
+ hashmap__for_each_entry(map, cur, bkt) {
+ if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries"))
+ goto cleanup;
+ key = cur->pkey;
+ value = cur->pvalue;
+ CHECK_STR("entry", key, "a");
+ CHECK_STR("entry", value, "apple");
+ i++;
}
+#undef CHECK_STR
cleanup:
hashmap__free(map);
}
-static size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(long k, void *ctx)
{
return 0;
}
static void test_hashmap_multimap(void)
{
- void *k1 = (void *)0, *k2 = (void *)1;
+ long k1 = 0, k2 = 1;
struct hashmap_entry *entry;
struct hashmap *map;
long found_msk;
@@ -273,23 +353,23 @@ static void test_hashmap_multimap(void)
* [0] -> 1, 2, 4;
* [1] -> 8, 16, 32;
*/
- err = hashmap__append(map, k1, (void *)1);
+ err = hashmap__append(map, k1, 1);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)2);
+ err = hashmap__append(map, k1, 2);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)4);
+ err = hashmap__append(map, k1, 4);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)8);
+ err = hashmap__append(map, k2, 8);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)16);
+ err = hashmap__append(map, k2, 16);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)32);
+ err = hashmap__append(map, k2, 32);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
@@ -300,7 +380,7 @@ static void test_hashmap_multimap(void)
/* verify global iteration still works and sees all values */
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
"not all keys iterated: %lx\n", found_msk))
@@ -309,7 +389,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 1 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k1) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
"invalid k1 values: %lx\n", found_msk))
@@ -318,7 +398,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 2 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k2) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
"invalid k2 values: %lx\n", found_msk))
@@ -333,7 +413,7 @@ static void test_hashmap_empty()
struct hashmap_entry *entry;
int bkt;
struct hashmap *map;
- void *k = (void *)0;
+ long k = 0;
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
@@ -374,4 +454,6 @@ void test_hashmap()
test_hashmap_multimap();
if (test__start_subtest("empty"))
test_hashmap_empty();
+ if (test__start_subtest("ptr_iface"))
+ test_hashmap_ptr_iface();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 287b3ac40227..eedbf1937fc4 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -312,12 +312,12 @@ static inline __u64 get_time_ns(void)
return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
}
-static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
{
return str_hash((const char *) key);
}
-static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
{
return strcmp((const char *) key1, (const char *) key2) == 0;
}
@@ -372,7 +372,7 @@ static int get_syms(char ***symsp, size_t *cntp)
sizeof("__ftrace_invalid_address__") - 1))
continue;
- err = hashmap__add(map, name, NULL);
+ err = hashmap__add(map, name, 0);
if (err == -EEXIST)
continue;
if (err)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 617bbce6ef8f..5cf85d0f9827 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -485,7 +485,7 @@ static void misc(void)
goto check_linum;
ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
- if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
+ if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
goto check_linum;
}
@@ -505,6 +505,8 @@ static void misc(void)
ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin");
+ ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
+
check_linum:
ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
sk_fds_close(&sk_fds);
@@ -539,7 +541,7 @@ void test_tcp_hdr_options(void)
goto skel_destroy;
cg_fd = test__join_cgroup(CG_NAME);
- if (ASSERT_GE(cg_fd, 0, "join_cgroup"))
+ if (!ASSERT_GE(cg_fd, 0, "join_cgroup"))
goto skel_destroy;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index 2c121c5d66a7..d487153a839d 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -27,6 +27,7 @@ unsigned int nr_pure_ack = 0;
unsigned int nr_data = 0;
unsigned int nr_syn = 0;
unsigned int nr_fin = 0;
+unsigned int nr_hwtstamp = 0;
/* Check the header received from the active side */
static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
@@ -146,6 +147,9 @@ static int check_active_hdr_in(struct bpf_sock_ops *skops)
if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
nr_pure_ack++;
+ if (skops->skb_hwtstamp)
+ nr_hwtstamp++;
+
return CG_OK;
}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0e9a47f97890..c34f37d7a523 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -222,6 +222,26 @@ static char *test_result(bool failed, bool skipped)
return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
}
+#define TEST_NUM_WIDTH 7
+
+static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state)
+{
+ int skipped_cnt = test_state->skip_cnt;
+ int subtests_cnt = test_state->subtest_num;
+
+ fprintf(env.stdout, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name);
+ if (test_state->error_cnt)
+ fprintf(env.stdout, "FAIL");
+ else if (!skipped_cnt)
+ fprintf(env.stdout, "OK");
+ else if (skipped_cnt == subtests_cnt || !subtests_cnt)
+ fprintf(env.stdout, "SKIP");
+ else
+ fprintf(env.stdout, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt);
+
+ fprintf(env.stdout, "\n");
+}
+
static void print_test_log(char *log_buf, size_t log_cnt)
{
log_buf[log_cnt] = '\0';
@@ -230,18 +250,6 @@ static void print_test_log(char *log_buf, size_t log_cnt)
fprintf(env.stdout, "\n");
}
-#define TEST_NUM_WIDTH 7
-
-static void print_test_name(int test_num, const char *test_name, char *result)
-{
- fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name);
-
- if (result)
- fprintf(env.stdout, ":%s", result);
-
- fprintf(env.stdout, "\n");
-}
-
static void print_subtest_name(int test_num, int subtest_num,
const char *test_name, char *subtest_name,
char *result)
@@ -307,8 +315,7 @@ static void dump_test_log(const struct prog_test_def *test,
subtest_state->skipped));
}
- print_test_name(test->test_num, test->test_name,
- test_result(test_failed, test_state->skip_cnt));
+ print_test_result(test, test_state);
}
static void stdio_restore(void);
@@ -1070,8 +1077,7 @@ static void run_one_test(int test_num)
state->tested = true;
if (verbose() && env.worker_id == -1)
- print_test_name(test_num + 1, test->test_name,
- test_result(state->error_cnt, state->skip_cnt));
+ print_test_result(test, state);
reset_affinity();
restore_netns();
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 973cbf6af323..f961b49b8ef4 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -17,6 +17,7 @@
#include <bpf/libbpf.h>
#include <libelf.h>
#include <gelf.h>
+#include <float.h>
enum stat_id {
VERDICT,
@@ -34,6 +35,45 @@ enum stat_id {
NUM_STATS_CNT = FILE_NAME - VERDICT,
};
+/* In comparison mode each stat can specify up to four different values:
+ * - A side value;
+ * - B side value;
+ * - absolute diff value;
+ * - relative (percentage) diff value.
+ *
+ * When specifying stat specs in comparison mode, user can use one of the
+ * following variant suffixes to specify which exact variant should be used for
+ * ordering or filtering:
+ * - `_a` for A side value;
+ * - `_b` for B side value;
+ * - `_diff` for absolute diff value;
+ * - `_pct` for relative (percentage) diff value.
+ *
+ * If no variant suffix is provided, then `_b` (control data) is assumed.
+ *
+ * As an example, let's say instructions stat has the following output:
+ *
+ * Insns (A) Insns (B) Insns (DIFF)
+ * --------- --------- --------------
+ * 21547 20920 -627 (-2.91%)
+ *
+ * Then:
+ * - 21547 is A side value (insns_a);
+ * - 20920 is B side value (insns_b);
+ * - -627 is absolute diff value (insns_diff);
+ * - -2.91% is relative diff value (insns_pct).
+ *
+ * For verdict there is no verdict_pct variant.
+ * For file and program name, _a and _b variants are equivalent and there are
+ * no _diff or _pct variants.
+ */
+enum stat_variant {
+ VARIANT_A,
+ VARIANT_B,
+ VARIANT_DIFF,
+ VARIANT_PCT,
+};
+
struct verif_stats {
char *file_name;
char *prog_name;
@@ -41,9 +81,19 @@ struct verif_stats {
long stats[NUM_STATS_CNT];
};
+/* joined comparison mode stats */
+struct verif_stats_join {
+ char *file_name;
+ char *prog_name;
+
+ const struct verif_stats *stats_a;
+ const struct verif_stats *stats_b;
+};
+
struct stat_specs {
int spec_cnt;
enum stat_id ids[ALL_STATS_CNT];
+ enum stat_variant variants[ALL_STATS_CNT];
bool asc[ALL_STATS_CNT];
int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
};
@@ -54,9 +104,31 @@ enum resfmt {
RESFMT_CSV,
};
+enum filter_kind {
+ FILTER_NAME,
+ FILTER_STAT,
+};
+
+enum operator_kind {
+ OP_EQ, /* == or = */
+ OP_NEQ, /* != or <> */
+ OP_LT, /* < */
+ OP_LE, /* <= */
+ OP_GT, /* > */
+ OP_GE, /* >= */
+};
+
struct filter {
+ enum filter_kind kind;
+ /* FILTER_NAME */
+ char *any_glob;
char *file_glob;
char *prog_glob;
+ /* FILTER_STAT */
+ enum operator_kind op;
+ int stat_id;
+ enum stat_variant stat_var;
+ long value;
};
static struct env {
@@ -67,6 +139,7 @@ static struct env {
int log_level;
enum resfmt out_fmt;
bool comparison_mode;
+ bool replay_mode;
struct verif_stats *prog_stats;
int prog_stat_cnt;
@@ -75,6 +148,9 @@ static struct env {
struct verif_stats *baseline_stats;
int baseline_stat_cnt;
+ struct verif_stats_join *join_stats;
+ int join_stat_cnt;
+
struct stat_specs output_spec;
struct stat_specs sort_spec;
@@ -115,6 +191,7 @@ static const struct argp_option opts[] = {
{ "sort", 's', "SPEC", 0, "Specify sort order" },
{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
{ "compare", 'C', NULL, 0, "Comparison mode" },
+ { "replay", 'R', NULL, 0, "Replay mode" },
{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
{},
};
@@ -169,6 +246,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'C':
env.comparison_mode = true;
break;
+ case 'R':
+ env.replay_mode = true;
+ break;
case 'f':
if (arg[0] == '@')
err = append_filter_file(arg + 1);
@@ -226,28 +306,6 @@ static bool glob_matches(const char *str, const char *pat)
return !*str && !*pat;
}
-static bool should_process_file(const char *filename)
-{
- int i;
-
- if (env.deny_filter_cnt > 0) {
- for (i = 0; i < env.deny_filter_cnt; i++) {
- if (glob_matches(filename, env.deny_filters[i].file_glob))
- return false;
- }
- }
-
- if (env.allow_filter_cnt == 0)
- return true;
-
- for (i = 0; i < env.allow_filter_cnt; i++) {
- if (glob_matches(filename, env.allow_filters[i].file_glob))
- return true;
- }
-
- return false;
-}
-
static bool is_bpf_obj_file(const char *path) {
Elf64_Ehdr *ehdr;
int fd, err = -EINVAL;
@@ -280,45 +338,84 @@ cleanup:
return err == 0;
}
-static bool should_process_prog(const char *path, const char *prog_name)
+static bool should_process_file_prog(const char *filename, const char *prog_name)
{
- const char *filename = basename(path);
- int i;
+ struct filter *f;
+ int i, allow_cnt = 0;
- if (env.deny_filter_cnt > 0) {
- for (i = 0; i < env.deny_filter_cnt; i++) {
- if (glob_matches(filename, env.deny_filters[i].file_glob))
- return false;
- if (!env.deny_filters[i].prog_glob)
- continue;
- if (glob_matches(prog_name, env.deny_filters[i].prog_glob))
- return false;
- }
- }
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_NAME)
+ continue;
- if (env.allow_filter_cnt == 0)
- return true;
+ if (f->any_glob && glob_matches(filename, f->any_glob))
+ return false;
+ if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
+ return false;
+ if (f->file_glob && glob_matches(filename, f->file_glob))
+ return false;
+ if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
+ return false;
+ }
for (i = 0; i < env.allow_filter_cnt; i++) {
- if (!glob_matches(filename, env.allow_filters[i].file_glob))
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_NAME)
continue;
- /* if filter specifies only filename glob part, it implicitly
- * allows all progs within that file
- */
- if (!env.allow_filters[i].prog_glob)
- return true;
- if (glob_matches(prog_name, env.allow_filters[i].prog_glob))
+
+ allow_cnt++;
+ if (f->any_glob) {
+ if (glob_matches(filename, f->any_glob))
+ return true;
+ /* If we don't know program name yet, any_glob filter
+ * has to assume that current BPF object file might be
+ * relevant; we'll check again later on after opening
+ * BPF object file, at which point program name will
+ * be known finally.
+ */
+ if (!prog_name || glob_matches(prog_name, f->any_glob))
+ return true;
+ } else {
+ if (f->file_glob && !glob_matches(filename, f->file_glob))
+ continue;
+ if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
+ continue;
return true;
+ }
}
- return false;
+ /* if there are no file/prog name allow filters, allow all progs,
+ * unless they are denied earlier explicitly
+ */
+ return allow_cnt == 0;
}
+static struct {
+ enum operator_kind op_kind;
+ const char *op_str;
+} operators[] = {
+ /* Order of these definitions matter to avoid situations like '<'
+ * matching part of what is actually a '<>' operator. That is,
+ * substrings should go last.
+ */
+ { OP_EQ, "==" },
+ { OP_NEQ, "!=" },
+ { OP_NEQ, "<>" },
+ { OP_LE, "<=" },
+ { OP_LT, "<" },
+ { OP_GE, ">=" },
+ { OP_GT, ">" },
+ { OP_EQ, "=" },
+};
+
+static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+
static int append_filter(struct filter **filters, int *cnt, const char *str)
{
struct filter *f;
void *tmp;
const char *p;
+ int i;
tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
if (!tmp)
@@ -326,26 +423,108 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
*filters = tmp;
f = &(*filters)[*cnt];
- f->file_glob = f->prog_glob = NULL;
+ memset(f, 0, sizeof(*f));
+
+ /* First, let's check if it's a stats filter of the following form:
+ * <stat><op><value, where:
+ * - <stat> is one of supported numerical stats (verdict is also
+ * considered numerical, failure == 0, success == 1);
+ * - <op> is comparison operator (see `operators` definitions);
+ * - <value> is an integer (or failure/success, or false/true as
+ * special aliases for 0 and 1, respectively).
+ * If the form doesn't match what user provided, we assume file/prog
+ * glob filter.
+ */
+ for (i = 0; i < ARRAY_SIZE(operators); i++) {
+ enum stat_variant var;
+ int id;
+ long val;
+ const char *end = str;
+ const char *op_str;
- /* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */
+ op_str = operators[i].op_str;
+ p = strstr(str, op_str);
+ if (!p)
+ continue;
+
+ if (!parse_stat_id_var(str, p - str, &id, &var)) {
+ fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
+ return -EINVAL;
+ }
+ if (id >= FILE_NAME) {
+ fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
+ return -EINVAL;
+ }
+
+ p += strlen(op_str);
+
+ if (strcasecmp(p, "true") == 0 ||
+ strcasecmp(p, "t") == 0 ||
+ strcasecmp(p, "success") == 0 ||
+ strcasecmp(p, "succ") == 0 ||
+ strcasecmp(p, "s") == 0 ||
+ strcasecmp(p, "match") == 0 ||
+ strcasecmp(p, "m") == 0) {
+ val = 1;
+ } else if (strcasecmp(p, "false") == 0 ||
+ strcasecmp(p, "f") == 0 ||
+ strcasecmp(p, "failure") == 0 ||
+ strcasecmp(p, "fail") == 0 ||
+ strcasecmp(p, "mismatch") == 0 ||
+ strcasecmp(p, "mis") == 0) {
+ val = 0;
+ } else {
+ errno = 0;
+ val = strtol(p, (char **)&end, 10);
+ if (errno || end == p || *end != '\0' ) {
+ fprintf(stderr, "Invalid integer value in '%s'!\n", str);
+ return -EINVAL;
+ }
+ }
+
+ f->kind = FILTER_STAT;
+ f->stat_id = id;
+ f->stat_var = var;
+ f->op = operators[i].op_kind;
+ f->value = val;
+
+ *cnt += 1;
+ return 0;
+ }
+
+ /* File/prog filter can be specified either as '<glob>' or
+ * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
+ * both file and program names. This seems to be way more useful in
+ * practice. If user needs full control, they can use '/<prog-glob>'
+ * form to glob just program name, or '<file-glob>/' to glob only file
+ * name. But usually common <glob> seems to be the most useful and
+ * ergonomic way.
+ */
+ f->kind = FILTER_NAME;
p = strchr(str, '/');
if (!p) {
- f->file_glob = strdup(str);
- if (!f->file_glob)
+ f->any_glob = strdup(str);
+ if (!f->any_glob)
return -ENOMEM;
} else {
- f->file_glob = strndup(str, p - str);
- f->prog_glob = strdup(p + 1);
- if (!f->file_glob || !f->prog_glob) {
- free(f->file_glob);
- free(f->prog_glob);
- f->file_glob = f->prog_glob = NULL;
- return -ENOMEM;
+ if (str != p) {
+ /* non-empty file glob */
+ f->file_glob = strndup(str, p - str);
+ if (!f->file_glob)
+ return -ENOMEM;
+ }
+ if (strlen(p + 1) > 0) {
+ /* non-empty prog glob */
+ f->prog_glob = strdup(p + 1);
+ if (!f->prog_glob) {
+ free(f->file_glob);
+ f->file_glob = NULL;
+ return -ENOMEM;
+ }
}
}
- *cnt = *cnt + 1;
+ *cnt += 1;
return 0;
}
@@ -388,6 +567,15 @@ static const struct stat_specs default_output_spec = {
},
};
+static const struct stat_specs default_csv_output_spec = {
+ .spec_cnt = 9,
+ .ids = {
+ FILE_NAME, PROG_NAME, VERDICT, DURATION,
+ TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+ MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+ },
+};
+
static const struct stat_specs default_sort_spec = {
.spec_cnt = 2,
.ids = {
@@ -396,48 +584,123 @@ static const struct stat_specs default_sort_spec = {
.asc = { true, true, },
};
+/* sorting for comparison mode to join two data sets */
+static const struct stat_specs join_sort_spec = {
+ .spec_cnt = 2,
+ .ids = {
+ FILE_NAME, PROG_NAME,
+ },
+ .asc = { true, true, },
+};
+
static struct stat_def {
const char *header;
const char *names[4];
bool asc_by_default;
+ bool left_aligned;
} stat_defs[] = {
- [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ },
- [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ },
- [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ },
+ [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
+ [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
+ [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
[DURATION] = { "Duration (us)", {"duration", "dur"}, },
- [TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, },
- [TOTAL_STATES] = { "Total states", {"total_states", "states"}, },
+ [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
+ [TOTAL_STATES] = { "States", {"total_states", "states"}, },
[PEAK_STATES] = { "Peak states", {"peak_states"}, },
[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
};
+static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+{
+ static const char *var_sfxs[] = {
+ [VARIANT_A] = "_a",
+ [VARIANT_B] = "_b",
+ [VARIANT_DIFF] = "_diff",
+ [VARIANT_PCT] = "_pct",
+ };
+ int i, j, k;
+
+ for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
+ struct stat_def *def = &stat_defs[i];
+ size_t alias_len, sfx_len;
+ const char *alias;
+
+ for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
+ alias = def->names[j];
+ if (!alias)
+ continue;
+
+ alias_len = strlen(alias);
+ if (strncmp(name, alias, alias_len) != 0)
+ continue;
+
+ if (alias_len == len) {
+ /* If no variant suffix is specified, we
+ * assume control group (just in case we are
+ * in comparison mode. Variant is ignored in
+ * non-comparison mode.
+ */
+ *var = VARIANT_B;
+ *id = i;
+ return true;
+ }
+
+ for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
+ sfx_len = strlen(var_sfxs[k]);
+ if (alias_len + sfx_len != len)
+ continue;
+
+ if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
+ *var = (enum stat_variant)k;
+ *id = i;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool is_asc_sym(char c)
+{
+ return c == '^';
+}
+
+static bool is_desc_sym(char c)
+{
+ return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
+}
+
static int parse_stat(const char *stat_name, struct stat_specs *specs)
{
- int id, i;
+ int id;
+ bool has_order = false, is_asc = false;
+ size_t len = strlen(stat_name);
+ enum stat_variant var;
if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
return -E2BIG;
}
- for (id = 0; id < ARRAY_SIZE(stat_defs); id++) {
- struct stat_def *def = &stat_defs[id];
-
- for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) {
- if (!def->names[i] || strcmp(def->names[i], stat_name) != 0)
- continue;
-
- specs->ids[specs->spec_cnt] = id;
- specs->asc[specs->spec_cnt] = def->asc_by_default;
- specs->spec_cnt++;
+ if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
+ has_order = true;
+ is_asc = is_asc_sym(stat_name[len - 1]);
+ len -= 1;
+ }
- return 0;
- }
+ if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+ fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
+ return -ESRCH;
}
- fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
- return -ESRCH;
+ specs->ids[specs->spec_cnt] = id;
+ specs->variants[specs->spec_cnt] = var;
+ specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+ specs->spec_cnt++;
+
+ return 0;
}
static int parse_stats(const char *stats_str, struct stat_specs *specs)
@@ -540,7 +803,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
int err = 0;
void *tmp;
- if (!should_process_prog(filename, bpf_program__name(prog))) {
+ if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
env.progs_skipped++;
return 0;
}
@@ -596,7 +859,7 @@ static int process_obj(const char *filename)
LIBBPF_OPTS(bpf_object_open_opts, opts);
int err = 0, prog_cnt = 0;
- if (!should_process_file(basename(filename))) {
+ if (!should_process_file_prog(basename(filename), NULL)) {
if (env.verbose)
printf("Skipping '%s' due to filters...\n", filename);
env.files_skipped++;
@@ -716,7 +979,106 @@ static int cmp_prog_stats(const void *v1, const void *v2)
return cmp;
}
- return 0;
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
+}
+
+static void fetch_join_stat_value(const struct verif_stats_join *s,
+ enum stat_id id, enum stat_variant var,
+ const char **str_val,
+ double *num_val)
+{
+ long v1, v2;
+
+ if (id == FILE_NAME) {
+ *str_val = s->file_name;
+ return;
+ }
+ if (id == PROG_NAME) {
+ *str_val = s->prog_name;
+ return;
+ }
+
+ v1 = s->stats_a ? s->stats_a->stats[id] : 0;
+ v2 = s->stats_b ? s->stats_b->stats[id] : 0;
+
+ switch (var) {
+ case VARIANT_A:
+ if (!s->stats_a)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_a->stats[id];
+ return;
+ case VARIANT_B:
+ if (!s->stats_b)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_b->stats[id];
+ return;
+ case VARIANT_DIFF:
+ if (!s->stats_a || !s->stats_b)
+ *num_val = -DBL_MAX;
+ else if (id == VERDICT)
+ *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
+ else
+ *num_val = (double)(v2 - v1);
+ return;
+ case VARIANT_PCT:
+ if (!s->stats_a || !s->stats_b) {
+ *num_val = -DBL_MAX;
+ } else if (v1 == 0) {
+ if (v1 == v2)
+ *num_val = 0.0;
+ else
+ *num_val = v2 < v1 ? -100.0 : 100.0;
+ } else {
+ *num_val = (v2 - v1) * 100.0 / v1;
+ }
+ return;
+ }
+}
+
+static int cmp_join_stat(const struct verif_stats_join *s1,
+ const struct verif_stats_join *s2,
+ enum stat_id id, enum stat_variant var, bool asc)
+{
+ const char *str1 = NULL, *str2 = NULL;
+ double v1, v2;
+ int cmp = 0;
+
+ fetch_join_stat_value(s1, id, var, &str1, &v1);
+ fetch_join_stat_value(s2, id, var, &str2, &v2);
+
+ if (str1)
+ cmp = strcmp(str1, str2);
+ else if (v1 != v2)
+ cmp = v1 < v2 ? -1 : 1;
+
+ return asc ? cmp : -cmp;
+}
+
+static int cmp_join_stats(const void *v1, const void *v2)
+{
+ const struct verif_stats_join *s1 = v1, *s2 = v2;
+ int i, cmp;
+
+ for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+ cmp = cmp_join_stat(s1, s2,
+ env.sort_spec.ids[i],
+ env.sort_spec.variants[i],
+ env.sort_spec.asc[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
}
#define HEADER_CHAR '-'
@@ -738,6 +1100,7 @@ static void output_header_underlines(void)
static void output_headers(enum resfmt fmt)
{
+ const char *fmt_str;
int i, len;
for (i = 0; i < env.output_spec.spec_cnt; i++) {
@@ -751,7 +1114,8 @@ static void output_headers(enum resfmt fmt)
*max_len = len;
break;
case RESFMT_TABLE:
- printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
+ fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
+ printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
if (i == env.output_spec.spec_cnt - 1)
printf("\n");
break;
@@ -772,13 +1136,16 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
{
switch (id) {
case FILE_NAME:
- *str = s->file_name;
+ *str = s ? s->file_name : "N/A";
break;
case PROG_NAME:
- *str = s->prog_name;
+ *str = s ? s->prog_name : "N/A";
break;
case VERDICT:
- *str = s->stats[VERDICT] ? "success" : "failure";
+ if (!s)
+ *str = "N/A";
+ else
+ *str = s->stats[VERDICT] ? "success" : "failure";
break;
case DURATION:
case TOTAL_INSNS:
@@ -786,7 +1153,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
case PEAK_STATES:
case MAX_STATES_PER_INSN:
case MARK_READ_MAX_LEN:
- *val = s->stats[id];
+ *val = s ? s->stats[id] : 0;
break;
default:
fprintf(stderr, "Unrecognized stat #%d\n", id);
@@ -841,42 +1208,6 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
}
}
-static int handle_verif_mode(void)
-{
- int i, err;
-
- if (env.filename_cnt == 0) {
- fprintf(stderr, "Please provide path to BPF object file!\n");
- argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
- return -EINVAL;
- }
-
- for (i = 0; i < env.filename_cnt; i++) {
- err = process_obj(env.filenames[i]);
- if (err) {
- fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
- return err;
- }
- }
-
- qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
-
- if (env.out_fmt == RESFMT_TABLE) {
- /* calculate column widths */
- output_headers(RESFMT_TABLE_CALCLEN);
- for (i = 0; i < env.prog_stat_cnt; i++)
- output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false);
- }
-
- /* actually output the table */
- output_headers(env.out_fmt);
- for (i = 0; i < env.prog_stat_cnt; i++) {
- output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1);
- }
-
- return 0;
-}
-
static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
{
switch (id) {
@@ -1008,7 +1339,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs,
* parsed entire line; if row should be ignored we pretend we
* never parsed it
*/
- if (!should_process_prog(st->file_name, st->prog_name)) {
+ if (!should_process_file_prog(st->file_name, st->prog_name)) {
free(st->file_name);
free(st->prog_name);
*stat_cntp -= 1;
@@ -1097,9 +1428,11 @@ static void output_comp_headers(enum resfmt fmt)
output_comp_header_underlines();
}
-static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp,
+static void output_comp_stats(const struct verif_stats_join *join_stats,
enum resfmt fmt, bool last)
{
+ const struct verif_stats *base = join_stats->stats_a;
+ const struct verif_stats *comp = join_stats->stats_b;
char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
int i;
@@ -1117,33 +1450,45 @@ static void output_comp_stats(const struct verif_stats *base, const struct verif
/* normalize all the outputs to be in string buffers for simplicity */
if (is_key_stat(id)) {
/* key stats (file and program name) are always strings */
- if (base != &fallback_stats)
+ if (base)
snprintf(base_buf, sizeof(base_buf), "%s", base_str);
else
snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
} else if (base_str) {
snprintf(base_buf, sizeof(base_buf), "%s", base_str);
snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
- if (strcmp(base_str, comp_str) == 0)
+ if (!base || !comp)
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+ else if (strcmp(base_str, comp_str) == 0)
snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
else
snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
} else {
double p = 0.0;
- snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
- snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+ if (base)
+ snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
+ else
+ snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
+ if (comp)
+ snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+ else
+ snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
diff_val = comp_val - base_val;
- if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) {
- if (comp_val == base_val)
- p = 0.0; /* avoid +0 (+100%) case */
- else
- p = comp_val < base_val ? -100.0 : 100.0;
+ if (!base || !comp) {
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
} else {
- p = diff_val * 100.0 / base_val;
+ if (base_val == 0) {
+ if (comp_val == base_val)
+ p = 0.0; /* avoid +0 (+100%) case */
+ else
+ p = comp_val < base_val ? -100.0 : 100.0;
+ } else {
+ p = diff_val * 100.0 / base_val;
+ }
+ snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
}
- snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
}
switch (fmt) {
@@ -1199,14 +1544,64 @@ static int cmp_stats_key(const struct verif_stats *base, const struct verif_stat
return strcmp(base->prog_name, comp->prog_name);
}
+static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
+{
+ static const double eps = 1e-9;
+ const char *str = NULL;
+ double value = 0.0;
+
+ fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+
+ switch (f->op) {
+ case OP_EQ: return value > f->value - eps && value < f->value + eps;
+ case OP_NEQ: return value < f->value - eps || value > f->value + eps;
+ case OP_LT: return value < f->value - eps;
+ case OP_LE: return value <= f->value + eps;
+ case OP_GT: return value > f->value + eps;
+ case OP_GE: return value >= f->value - eps;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_join_stats(const struct verif_stats_join *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
static int handle_comparison_mode(void)
{
struct stat_specs base_specs = {}, comp_specs = {};
+ struct stat_specs tmp_sort_spec;
enum resfmt cur_fmt;
- int err, i, j;
+ int err, i, j, last_idx;
if (env.filename_cnt != 2) {
- fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n");
+ fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
return -EINVAL;
}
@@ -1244,31 +1639,26 @@ static int handle_comparison_mode(void)
}
}
+ /* Replace user-specified sorting spec with file+prog sorting rule to
+ * be able to join two datasets correctly. Once we are done, we will
+ * restore the original sort spec.
+ */
+ tmp_sort_spec = env.sort_spec;
+ env.sort_spec = join_sort_spec;
qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
+ env.sort_spec = tmp_sort_spec;
- /* for human-readable table output we need to do extra pass to
- * calculate column widths, so we substitute current output format
- * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
- * and do everything again.
- */
- if (env.out_fmt == RESFMT_TABLE)
- cur_fmt = RESFMT_TABLE_CALCLEN;
- else
- cur_fmt = env.out_fmt;
-
-one_more_time:
- output_comp_headers(cur_fmt);
-
- /* If baseline and comparison datasets have different subset of rows
- * (we match by 'object + prog' as a unique key) then assume
- * empty/missing/zero value for rows that are missing in the opposite
- * data set
+ /* Join two datasets together. If baseline and comparison datasets
+ * have different subset of rows (we match by 'object + prog' as
+ * a unique key) then assume empty/missing/zero value for rows that
+ * are missing in the opposite data set.
*/
i = j = 0;
while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
- bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1);
const struct verif_stats *base, *comp;
+ struct verif_stats_join *join;
+ void *tmp;
int r;
base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
@@ -1285,18 +1675,64 @@ one_more_time:
return -EINVAL;
}
+ tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
+ if (!tmp)
+ return -ENOMEM;
+ env.join_stats = tmp;
+
+ join = &env.join_stats[env.join_stat_cnt];
+ memset(join, 0, sizeof(*join));
+
r = cmp_stats_key(base, comp);
if (r == 0) {
- output_comp_stats(base, comp, cur_fmt, last);
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = comp;
i++;
j++;
} else if (comp == &fallback_stats || r < 0) {
- output_comp_stats(base, &fallback_stats, cur_fmt, last);
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = NULL;
i++;
} else {
- output_comp_stats(&fallback_stats, comp, cur_fmt, last);
+ join->file_name = comp->file_name;
+ join->prog_name = comp->prog_name;
+ join->stats_a = NULL;
+ join->stats_b = comp;
j++;
}
+ env.join_stat_cnt += 1;
+ }
+
+ /* now sort joined results accorsing to sort spec */
+ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
+
+ /* for human-readable table output we need to do extra pass to
+ * calculate column widths, so we substitute current output format
+ * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
+ * and do everything again.
+ */
+ if (env.out_fmt == RESFMT_TABLE)
+ cur_fmt = RESFMT_TABLE_CALCLEN;
+ else
+ cur_fmt = env.out_fmt;
+
+one_more_time:
+ output_comp_headers(cur_fmt);
+
+ for (i = 0; i < env.join_stat_cnt; i++) {
+ const struct verif_stats_join *join = &env.join_stats[i];
+
+ if (!should_output_join_stats(join))
+ continue;
+
+ if (cur_fmt == RESFMT_TABLE_CALCLEN)
+ last_idx = i;
+
+ output_comp_stats(join, cur_fmt, i == last_idx);
}
if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1307,6 +1743,128 @@ one_more_time:
return 0;
}
+static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
+{
+ long value = stats->stats[f->stat_id];
+
+ switch (f->op) {
+ case OP_EQ: return value == f->value;
+ case OP_NEQ: return value != f->value;
+ case OP_LT: return value < f->value;
+ case OP_LE: return value <= f->value;
+ case OP_GT: return value > f->value;
+ case OP_GE: return value >= f->value;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_stats(const struct verif_stats *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
+static void output_prog_stats(void)
+{
+ const struct verif_stats *stats;
+ int i, last_stat_idx = 0;
+
+ if (env.out_fmt == RESFMT_TABLE) {
+ /* calculate column widths */
+ output_headers(RESFMT_TABLE_CALCLEN);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ output_stats(stats, RESFMT_TABLE_CALCLEN, false);
+ last_stat_idx = i;
+ }
+ }
+
+ /* actually output the table */
+ output_headers(env.out_fmt);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ output_stats(stats, env.out_fmt, i == last_stat_idx);
+ }
+}
+
+static int handle_verif_mode(void)
+{
+ int i, err;
+
+ if (env.filename_cnt == 0) {
+ fprintf(stderr, "Please provide path to BPF object file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < env.filename_cnt; i++) {
+ err = process_obj(env.filenames[i]);
+ if (err) {
+ fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
+ return err;
+ }
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+ return 0;
+}
+
+static int handle_replay_mode(void)
+{
+ struct stat_specs specs = {};
+ int err;
+
+ if (env.filename_cnt != 1) {
+ fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ err = parse_stats_csv(env.filenames[0], &specs,
+ &env.prog_stats, &env.prog_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+ return err;
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+ return 0;
+}
+
int main(int argc, char **argv)
{
int err = 0, i;
@@ -1315,34 +1873,49 @@ int main(int argc, char **argv)
return 1;
if (env.verbose && env.quiet) {
- fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n");
+ fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
return 1;
}
if (env.verbose && env.log_level == 0)
env.log_level = 1;
- if (env.output_spec.spec_cnt == 0)
- env.output_spec = default_output_spec;
+ if (env.output_spec.spec_cnt == 0) {
+ if (env.out_fmt == RESFMT_CSV)
+ env.output_spec = default_csv_output_spec;
+ else
+ env.output_spec = default_output_spec;
+ }
if (env.sort_spec.spec_cnt == 0)
env.sort_spec = default_sort_spec;
+ if (env.comparison_mode && env.replay_mode) {
+ fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return 1;
+ }
+
if (env.comparison_mode)
err = handle_comparison_mode();
+ else if (env.replay_mode)
+ err = handle_replay_mode();
else
err = handle_verif_mode();
free_verif_stats(env.prog_stats, env.prog_stat_cnt);
free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
+ free(env.join_stats);
for (i = 0; i < env.filename_cnt; i++)
free(env.filenames[i]);
free(env.filenames);
for (i = 0; i < env.allow_filter_cnt; i++) {
+ free(env.allow_filters[i].any_glob);
free(env.allow_filters[i].file_glob);
free(env.allow_filters[i].prog_glob);
}
free(env.allow_filters);
for (i = 0; i < env.deny_filter_cnt; i++) {
+ free(env.deny_filters[i].any_glob);
free(env.deny_filters[i].file_glob);
free(env.deny_filters[i].prog_glob);
}
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
index ff35320d2be9..410a1385a01d 100644
--- a/tools/testing/selftests/bpf/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -104,7 +104,8 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *
{ "tc", no_argument, NULL, 'c' },
{ NULL, 0, NULL, 0 },
};
- unsigned long mss4, mss6, wscale, ttl;
+ unsigned long mss4, wscale, ttl;
+ unsigned long long mss6;
unsigned int tcpipopts_mask = 0;
if (argc < 2)
@@ -286,7 +287,7 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports
prog_info = (struct bpf_prog_info) {
.nr_map_ids = 8,
- .map_ids = (__u64)map_ids,
+ .map_ids = (__u64)(unsigned long)map_ids,
};
info_len = sizeof(prog_info);
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 0b3ff49c740d..39d349509ba4 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -33,6 +33,7 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "xsk.h"
+#include "bpf_util.h"
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -521,25 +522,6 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
return 0;
}
-/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
- * is zero-terminated string no matter what (unless sz == 0, in which case
- * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
- * in what is returned. Given this is internal helper, it's trivial to extend
- * this, when necessary. Use this instead of strncpy inside libbpf source code.
- */
-static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
-{
- size_t i;
-
- if (sz == 0)
- return;
-
- sz--;
- for (i = 0; i < sz && src[i]; i++)
- dst[i] = src[i];
- dst[i] = '\0';
-}
-
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
@@ -552,7 +534,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
- libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
+ bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
ret = -errno;
@@ -771,7 +753,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
}
ctx->ifindex = ifindex;
- libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+ bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
xsk->ctx = ctx;
xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
@@ -958,7 +940,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
ctx->refcount = 1;
ctx->umem = umem;
ctx->queue_id = queue_id;
- libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+ bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
ctx->fill = fill;
ctx->comp = comp;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 681a5db80dae..162d3a516f2c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1006,7 +1006,8 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
{
struct xsk_socket_info *xsk = ifobject->xsk;
bool use_poll = ifobject->use_poll;
- u32 i, idx = 0, ret, valid_pkts = 0;
+ u32 i, idx = 0, valid_pkts = 0;
+ int ret;
while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
if (use_poll) {