summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_design_QA.rst11
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst27
-rw-r--r--Documentation/bpf/btf.rst7
-rw-r--r--Documentation/bpf/index.rst1
-rw-r--r--Documentation/bpf/kfuncs.rst54
-rw-r--r--Documentation/bpf/libbpf/index.rst3
-rw-r--r--Documentation/bpf/libbpf/program_types.rst203
-rw-r--r--Documentation/bpf/map_array.rst22
-rw-r--r--Documentation/bpf/map_bloom_filter.rst174
-rw-r--r--Documentation/bpf/map_cpumap.rst59
-rw-r--r--Documentation/bpf/map_devmap.rst238
-rw-r--r--Documentation/bpf/map_hash.rst33
-rw-r--r--Documentation/bpf/map_lpm_trie.rst24
-rw-r--r--Documentation/bpf/map_of_maps.rst6
-rw-r--r--Documentation/bpf/map_queue_stack.rst36
-rw-r--r--Documentation/bpf/map_xskmap.rst192
-rw-r--r--Documentation/bpf/programs.rst3
-rw-r--r--Documentation/bpf/redirect.rst81
-rw-r--r--include/linux/bpf.h154
-rw-r--r--include/linux/bpf_verifier.h37
-rw-r--r--include/linux/btf.h137
-rw-r--r--include/linux/btf_ids.h2
-rw-r--r--include/linux/compiler_types.h3
-rw-r--r--include/linux/filter.h20
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/uapi/linux/bpf.h33
-rw-r--r--kernel/bpf/arraymap.c1
-rw-r--r--kernel/bpf/bpf_lsm.c6
-rw-r--r--kernel/bpf/btf.c882
-rw-r--r--kernel/bpf/cgroup_iter.c14
-rw-r--r--kernel/bpf/core.c16
-rw-r--r--kernel/bpf/cpumap.c4
-rw-r--r--kernel/bpf/devmap.c4
-rw-r--r--kernel/bpf/hashtab.c1
-rw-r--r--kernel/bpf/helpers.c363
-rw-r--r--kernel/bpf/map_in_map.c48
-rw-r--r--kernel/bpf/ringbuf.c6
-rw-r--r--kernel/bpf/syscall.c96
-rw-r--r--kernel/bpf/verifier.c1531
-rw-r--r--kernel/trace/bpf_trace.c6
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c14
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/core/filter.c57
-rw-r--r--net/ipv4/bpf_tcp_ca.c17
-rw-r--r--net/netfilter/nf_conntrack_bpf.c17
-rw-r--r--net/xdp/xskmap.c4
-rwxr-xr-xsamples/bpf/test_cgrp2_tc.sh2
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c2
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst9
-rw-r--r--tools/bpf/bpftool/Documentation/substitutions.rst2
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool2
-rw-r--r--tools/bpf/bpftool/btf.c19
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/gen.c10
-rw-r--r--tools/bpf/bpftool/iter.c10
-rw-r--r--tools/bpf/bpftool/main.c28
-rw-r--r--tools/bpf/bpftool/main.h3
-rw-r--r--tools/bpf/bpftool/map.c20
-rw-r--r--tools/bpf/bpftool/prog.c15
-rw-r--r--tools/bpf/bpftool/struct_ops.c22
-rw-r--r--tools/include/uapi/linux/bpf.h33
-rw-r--r--tools/lib/bpf/btf.c5
-rw-r--r--tools/lib/bpf/btf_dump.c4
-rw-r--r--tools/lib/bpf/libbpf.c48
-rw-r--r--tools/lib/bpf/ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x5
-rw-r--r--tools/testing/selftests/bpf/Makefile14
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h68
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c19
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c175
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c146
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c740
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c142
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c163
-rw-r--r--tools/testing/selftests/bpf/prog_tests/type_cast.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h72
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c260
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c170
-rw-r--r--tools/testing/selftests/bpf/progs/empty_skb.c37
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c370
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.h56
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c581
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c8
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c290
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h72
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c273
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c222
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c204
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c83
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py6
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c174
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c2
109 files changed, 8697 insertions, 999 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index 17e774d96c5e..cec2371173d7 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -332,13 +332,14 @@ avoid defining types with 'bpf\_' prefix to not be broken in future releases.
In other words, no backwards compatibility is guaranteed if one using a type
in BTF with 'bpf\_' prefix.
-Q: What is the compatibility story for special BPF types in local kptrs?
-------------------------------------------------------------------------
-Q: Same as above, but for local kptrs (i.e. pointers to objects allocated using
-bpf_obj_new for user defined structures). Will the kernel preserve backwards
+Q: What is the compatibility story for special BPF types in allocated objects?
+------------------------------------------------------------------------------
+Q: Same as above, but for allocated objects (i.e. objects allocated using
+bpf_obj_new for user defined types). Will the kernel preserve backwards
compatibility for these features?
A: NO.
Unlike map value types, there are no stability guarantees for this case. The
-whole local kptr API itself is unstable (since it is exposed through kfuncs).
+whole API to work with allocated objects and any support for special fields
+inside them is unstable (since it is exposed through kfuncs).
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 761474bd7fe6..03d4993eda6f 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -44,6 +44,33 @@ is a guarantee that the reported issue will be overlooked.**
Submitting patches
==================
+Q: How do I run BPF CI on my changes before sending them out for review?
+------------------------------------------------------------------------
+A: BPF CI is GitHub based and hosted at https://github.com/kernel-patches/bpf.
+While GitHub also provides a CLI that can be used to accomplish the same
+results, here we focus on the UI based workflow.
+
+The following steps lay out how to start a CI run for your patches:
+
+- Create a fork of the aforementioned repository in your own account (one time
+ action)
+
+- Clone the fork locally, check out a new branch tracking either the bpf-next
+ or bpf branch, and apply your to-be-tested patches on top of it
+
+- Push the local branch to your fork and create a pull request against
+ kernel-patches/bpf's bpf-next_base or bpf_base branch, respectively
+
+Shortly after the pull request has been created, the CI workflow will run. Note
+that capacity is shared with patches submitted upstream being checked and so
+depending on utilization the run can take a while to finish.
+
+Note furthermore that both base branches (bpf-next_base and bpf_base) will be
+updated as patches are pushed to the respective upstream branches they track. As
+such, your patch set will automatically (be attempted to) be rebased as well.
+This behavior can result in a CI run being aborted and restarted with the new
+base line.
+
Q: To which mailing list do I need to submit my BPF patches?
------------------------------------------------------------
A: Please submit your BPF patches to the bpf kernel mailing list:
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index cf8722f96090..7cd7c5415a99 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -1062,4 +1062,9 @@ format.::
7. Testing
==========
-Kernel bpf selftest `test_btf.c` provides extensive set of BTF-related tests.
+The kernel BPF selftest `tools/testing/selftests/bpf/prog_tests/btf.c`_
+provides an extensive set of BTF-related tests.
+
+.. Links
+.. _tools/testing/selftests/bpf/prog_tests/btf.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/btf.c
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 1b50de1983ee..1088d44634d6 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -29,6 +29,7 @@ that goes into great technical depth about the BPF Architecture.
clang-notes
linux-notes
other
+ redirect
.. only:: subproject and html
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 0f858156371d..90774479ab7a 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -72,6 +72,30 @@ argument as its size. By default, without __sz annotation, the size of the type
of the pointer is used. Without __sz annotation, a kfunc cannot accept a void
pointer.
+2.2.2 __k Annotation
+--------------------
+
+This annotation is only understood for scalar arguments, where it indicates that
+the verifier must check the scalar argument to be a known constant, which does
+not indicate a size parameter, and the value of the constant is relevant to the
+safety of the program.
+
+An example is given below::
+
+ void *bpf_obj_new(u32 local_type_id__k, ...)
+ {
+ ...
+ }
+
+Here, bpf_obj_new uses local_type_id argument to find out the size of that type
+ID in program's BTF and return a sized pointer to it. Each type ID will have a
+distinct size, hence it is crucial to treat each such call as distinct when
+values don't match during verifier state pruning checks.
+
+Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
+size parameter, and the value of the constant matters for program safety, __k
+suffix should be used.
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -137,22 +161,20 @@ KF_ACQUIRE and KF_RET_NULL flags.
--------------------------
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
-indicates that the all pointer arguments will always have a guaranteed lifetime,
-and pointers to kernel objects are always passed to helpers in their unmodified
-form (as obtained from acquire kfuncs).
-
-It can be used to enforce that a pointer to a refcounted object acquired from a
-kfunc or BPF helper is passed as an argument to this kfunc without any
-modifications (e.g. pointer arithmetic) such that it is trusted and points to
-the original object.
-
-Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs,
-but those can have a non-zero offset.
-
-This flag is often used for kfuncs that operate (change some property, perform
-some operation) on an object that was obtained using an acquire kfunc. Such
-kfuncs need an unchanged pointer to ensure the integrity of the operation being
-performed on the expected object.
+indicates that the all pointer arguments are valid, and that all pointers to
+BTF objects have been passed in their unmodified form (that is, at a zero
+offset, and without having been obtained from walking another pointer).
+
+There are two types of pointers to kernel objects which are considered "valid":
+
+1. Pointers which are passed as tracepoint or struct_ops callback arguments.
+2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc.
+
+Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
+KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
+
+The definition of "valid" pointers is subject to change at any time, and has
+absolutely no ABI stability guarantees.
2.4.6 KF_SLEEPABLE flag
-----------------------
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
index 3722537d1384..f9b3b252e28f 100644
--- a/Documentation/bpf/libbpf/index.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+.. _libbpf:
+
libbpf
======
@@ -7,6 +9,7 @@ libbpf
:maxdepth: 1
API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
+ program_types
libbpf_naming_convention
libbpf_build
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
new file mode 100644
index 000000000000..ad4d4d5eecb0
--- /dev/null
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -0,0 +1,203 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+.. _program_types_and_elf:
+
+Program Types and ELF Sections
+==============================
+
+The table below lists the program types, their attach types where relevant and the ELF section
+names supported by libbpf for them. The ELF section names follow these rules:
+
+- ``type`` is an exact match, e.g. ``SEC("socket")``
+- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")``
+ with a '``/``' separator between ``type`` and ``extras``.
+
+When ``extras`` are specified, they provide details of how to auto-attach the BPF program. The
+format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")``
+for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are
+described in more detail in the footnotes.
+
+
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| Program Type | Attach Type | ELF Section Name | Sleepable |
++===========================================+========================================+==================================+===========+
+| ``BPF_PROG_TYPE_CGROUP_DEVICE`` | ``BPF_CGROUP_DEVICE`` | ``cgroup/dev`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SKB`` | | ``cgroup/skb`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_EGRESS`` | ``cgroup_skb/egress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_INGRESS`` | ``cgroup_skb/ingress`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCKOPT`` | ``BPF_CGROUP_GETSOCKOPT`` | ``cgroup/getsockopt`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_SETSOCKOPT`` | ``cgroup/setsockopt`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR`` | ``BPF_CGROUP_INET4_BIND`` | ``cgroup/bind4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_CONNECT`` | ``cgroup/connect4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_GETPEERNAME`` | ``cgroup/getpeername4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_GETSOCKNAME`` | ``cgroup/getsockname4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_BIND`` | ``cgroup/bind6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_CONNECT`` | ``cgroup/connect6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_GETPEERNAME`` | ``cgroup/getpeername6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_GETSOCKNAME`` | ``cgroup/getsockname6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP4_RECVMSG`` | ``cgroup/recvmsg4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP4_SENDMSG`` | ``cgroup/sendmsg4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_POST_BIND`` | ``cgroup/post_bind6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_SOCK_CREATE`` | ``cgroup/sock_create`` | |
++ + +----------------------------------+-----------+
+| | | ``cgroup/sock`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_SOCK_RELEASE`` | ``cgroup/sock_release`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SYSCTL`` | ``BPF_CGROUP_SYSCTL`` | ``cgroup/sysctl`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_EXT`` | | ``freplace+`` [#fentry]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_FLOW_DISSECTOR`` | ``BPF_FLOW_DISSECTOR`` | ``flow_dissector`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_KPROBE`` | | ``kprobe+`` [#kprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretprobe+`` [#kprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``ksyscall+`` [#ksyscall]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretsyscall+`` [#ksyscall]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe+`` [#uprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe.s+`` [#uprobe]_ | Yes |
++ + +----------------------------------+-----------+
+| | | ``uretprobe+`` [#uprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``uretprobe.s+`` [#uprobe]_ | Yes |
++ + +----------------------------------+-----------+
+| | | ``usdt+`` [#usdt]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretprobe.multi+`` [#kpmulti]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LSM`` | ``BPF_LSM_CGROUP`` | ``lsm_cgroup+`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_LSM_MAC`` | ``lsm+`` [#lsm]_ | |
++ + +----------------------------------+-----------+
+| | | ``lsm.s+`` [#lsm]_ | Yes |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_IN`` | | ``lwt_in`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_OUT`` | | ``lwt_out`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_SEG6LOCAL`` | | ``lwt_seg6local`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
++ + +----------------------------------+-----------+
+| | | ``raw_tracepoint.w+`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT`` | | ``raw_tp+`` [#rawtp]_ | |
++ + +----------------------------------+-----------+
+| | | ``raw_tracepoint+`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
++ + +----------------------------------+-----------+
+| | | ``tc`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_MSG`` | ``BPF_SK_MSG_VERDICT`` | ``sk_msg`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_REUSEPORT`` | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_REUSEPORT_SELECT`` | ``sk_reuseport`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_SKB`` | | ``sk_skb`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_SKB_STREAM_PARSER`` | ``sk_skb/stream_parser`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_SKB_STREAM_VERDICT`` | ``sk_skb/stream_verdict`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCKET_FILTER`` | | ``socket`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACEPOINT`` | | ``tp+`` [#tp]_ | |
++ + +----------------------------------+-----------+
+| | | ``tracepoint+`` [#tp]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACING`` | ``BPF_MODIFY_RETURN`` | ``fmod_ret+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fmod_ret.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_FENTRY`` | ``fentry+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fentry.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_FEXIT`` | ``fexit+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fexit.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_ITER`` | ``iter+`` [#iter]_ | |
++ + +----------------------------------+-----------+
+| | | ``iter.s+`` [#iter]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_RAW_TP`` | ``tp_btf+`` [#fentry]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_XDP`` | ``BPF_XDP_CPUMAP`` | ``xdp.frags/cpumap`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp/cpumap`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_XDP_DEVMAP`` | ``xdp.frags/devmap`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp/devmap`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_XDP`` | ``xdp.frags`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+
+
+.. rubric:: Footnotes
+
+.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``.
+.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid
+ characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid
+ non-negative integer.
+.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
+.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
+.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
+.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
+ supports ``*`` and ``?`` wildcards. Valid characters for pattern are
+ ``a-zA-Z0-9_.*?``.
+.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
+.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
+.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
+.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
diff --git a/Documentation/bpf/map_array.rst b/Documentation/bpf/map_array.rst
index b2cceb6c696b..f2f51a53e8ae 100644
--- a/Documentation/bpf/map_array.rst
+++ b/Documentation/bpf/map_array.rst
@@ -32,7 +32,11 @@ Usage
Kernel BPF
----------
-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
Array elements can be retrieved using the ``bpf_map_lookup_elem()`` helper.
@@ -40,7 +44,11 @@ This helper returns a pointer into the array element, so to avoid data races
with userspace reading the value, the user must use primitives like
``__sync_fetch_and_add()`` when updating the value in-place.
-.. c:function::
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
Array elements can be updated using the ``bpf_map_update_elem()`` helper.
@@ -53,7 +61,7 @@ To clear an array element, you may use ``bpf_map_update_elem()`` to insert a
zero value to that index.
Per CPU Array
-~~~~~~~~~~~~~
+-------------
Values stored in ``BPF_MAP_TYPE_ARRAY`` can be accessed by multiple programs
across different CPUs. To restrict storage to a single CPU, you may use a
@@ -63,7 +71,11 @@ When using a ``BPF_MAP_TYPE_PERCPU_ARRAY`` the ``bpf_map_update_elem()`` and
``bpf_map_lookup_elem()`` helpers automatically access the slot for the current
CPU.
-.. c:function::
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the array
@@ -119,7 +131,7 @@ This example BPF program shows how to access an array element.
index = ip.protocol;
value = bpf_map_lookup_elem(&my_map, &index);
if (value)
- __sync_fetch_and_add(&value, skb->len);
+ __sync_fetch_and_add(value, skb->len);
return 0;
}
diff --git a/Documentation/bpf/map_bloom_filter.rst b/Documentation/bpf/map_bloom_filter.rst
new file mode 100644
index 000000000000..c82487f2fe0d
--- /dev/null
+++ b/Documentation/bpf/map_bloom_filter.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================
+BPF_MAP_TYPE_BLOOM_FILTER
+=========================
+
+.. note::
+ - ``BPF_MAP_TYPE_BLOOM_FILTER`` was introduced in kernel version 5.16
+
+``BPF_MAP_TYPE_BLOOM_FILTER`` provides a BPF bloom filter map. Bloom
+filters are a space-efficient probabilistic data structure used to
+quickly test whether an element exists in a set. In a bloom filter,
+false positives are possible whereas false negatives are not.
+
+The bloom filter map does not have keys, only values. When the bloom
+filter map is created, it must be created with a ``key_size`` of 0. The
+bloom filter map supports two operations:
+
+- push: adding an element to the map
+- peek: determining whether an element is present in the map
+
+BPF programs must use ``bpf_map_push_elem`` to add an element to the
+bloom filter map and ``bpf_map_peek_elem`` to query the map. These
+operations are exposed to userspace applications using the existing
+``bpf`` syscall in the following way:
+
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+
+The ``max_entries`` size that is specified at map creation time is used
+to approximate a reasonable bitmap size for the bloom filter, and is not
+otherwise strictly enforced. If the user wishes to insert more entries
+into the bloom filter than ``max_entries``, this may lead to a higher
+false positive rate.
+
+The number of hashes to use for the bloom filter is configurable using
+the lower 4 bits of ``map_extra`` in ``union bpf_attr`` at map creation
+time. If no number is specified, the default used will be 5 hash
+functions. In general, using more hashes decreases both the false
+positive rate and the speed of a lookup.
+
+It is not possible to delete elements from a bloom filter map. A bloom
+filter map may be used as an inner map. The user is responsible for
+synchronising concurrent updates and lookups to ensure no false negative
+lookups occur.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+A ``value`` can be added to a bloom filter using the
+``bpf_map_push_elem()`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` when adding an entry to the bloom filter. This helper
+returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+The ``bpf_map_peek_elem()`` helper is used to determine whether
+``value`` is present in the bloom filter map. This helper returns ``0``
+if ``value`` is probably present in the map, or ``-ENOENT`` if ``value``
+is definitely not present in the map.
+
+Userspace
+---------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can add a ``value`` to a bloom filter using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY``. Returns ``0`` on
+success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can determine the presence of ``value`` in a bloom
+filter using libbpf's ``bpf_map_lookup_elem`` function. The ``key``
+parameter must be set to ``NULL``. Returns ``0`` if ``value`` is
+probably present in the map, or ``-ENOENT`` if ``value`` is definitely
+not present in the map.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a bloom filter in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ __type(value, __u32);
+ __uint(max_entries, 1000);
+ __uint(map_extra, 3);
+ } bloom_filter SEC(".maps");
+
+This snippet shows how to determine presence of a value in a bloom
+filter in a BPF program:
+
+.. code-block:: c
+
+ void *lookup(__u32 key)
+ {
+ if (bpf_map_peek_elem(&bloom_filter, &key) == 0) {
+ /* Verify not a false positive and fetch an associated
+ * value using a secondary lookup, e.g. in a hash table
+ */
+ return bpf_map_lookup_elem(&hash_table, &key);
+ }
+ return 0;
+ }
+
+Userspace
+---------
+
+This snippet shows how to use libbpf to create a bloom filter map from
+userspace:
+
+.. code-block:: c
+
+ int create_bloom()
+ {
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_extra = 3); /* number of hashes */
+
+ return bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER,
+ "ipv6_bloom", /* name */
+ 0, /* key size, must be zero */
+ sizeof(ipv6_addr), /* value size */
+ 10000, /* max entries */
+ &opts); /* create options */
+ }
+
+This snippet shows how to add an element to a bloom filter from
+userspace:
+
+.. code-block:: c
+
+ int add_element(struct bpf_map *bloom_map, __u32 value)
+ {
+ int bloom_fd = bpf_map__fd(bloom_map);
+ return bpf_map_update_elem(bloom_fd, NULL, &value, BPF_ANY);
+ }
+
+References
+==========
+
+https://lwn.net/ml/bpf/20210831225005.2762202-1-joannekoong@fb.com/
diff --git a/Documentation/bpf/map_cpumap.rst b/Documentation/bpf/map_cpumap.rst
index eaf57b38cafd..923cfc8ab51f 100644
--- a/Documentation/bpf/map_cpumap.rst
+++ b/Documentation/bpf/map_cpumap.rst
@@ -30,30 +30,35 @@ Usage
Kernel BPF
----------
-.. c:function::
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
- Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
- For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs.
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs.
- The lower two bits of ``flags`` are used as the return code if the map lookup
- fails. This is so that the return value can be one of the XDP program return
- codes up to ``XDP_TX``, as chosen by the caller.
+The lower two bits of ``flags`` are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller.
-Userspace
----------
+User space
+----------
.. note::
CPUMAP entries can only be updated/looked up/deleted from user space and not
from an eBPF program. Trying to call these functions from a kernel eBPF
program will result in the program failing to load and a verifier warning.
-.. c:function::
- int bpf_map_update_elem(int fd, const void *key, const void *value,
- __u64 flags);
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
- CPU entries can be added or updated using the ``bpf_map_update_elem()``
- helper. This helper replaces existing elements atomically. The ``value`` parameter
- can be ``struct bpf_cpumap_val``.
+CPU entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_cpumap_val``.
.. code-block:: c
@@ -65,23 +70,29 @@ Userspace
} bpf_prog;
};
- The flags argument can be one of the following:
+The flags argument can be one of the following:
- BPF_ANY: Create a new element or update an existing element.
- BPF_NOEXIST: Create a new element only if it did not exist.
- BPF_EXIST: Update an existing element.
-.. c:function::
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
int bpf_map_lookup_elem(int fd, const void *key, void *value);
- CPU entries can be retrieved using the ``bpf_map_lookup_elem()``
- helper.
+CPU entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
-.. c:function::
int bpf_map_delete_elem(int fd, const void *key);
- CPU entries can be deleted using the ``bpf_map_delete_elem()``
- helper. This helper will return 0 on success, or negative error in case of
- failure.
+CPU entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
Examples
========
@@ -142,8 +153,8 @@ The following code snippet shows how to declare a ``BPF_MAP_TYPE_CPUMAP`` called
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-Userspace
----------
+User space
+----------
The following code snippet shows how to dynamically set the max_entries for a
CPUMAP to the max number of cpus available on the system.
diff --git a/Documentation/bpf/map_devmap.rst b/Documentation/bpf/map_devmap.rst
new file mode 100644
index 000000000000..927312c7b8c8
--- /dev/null
+++ b/Documentation/bpf/map_devmap.rst
@@ -0,0 +1,238 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=================================================
+BPF_MAP_TYPE_DEVMAP and BPF_MAP_TYPE_DEVMAP_HASH
+=================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_DEVMAP`` was introduced in kernel version 4.14
+ - ``BPF_MAP_TYPE_DEVMAP_HASH`` was introduced in kernel version 5.4
+
+``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` are BPF maps primarily
+used as backend maps for the XDP BPF helper call ``bpf_redirect_map()``.
+``BPF_MAP_TYPE_DEVMAP`` is backed by an array that uses the key as
+the index to lookup a reference to a net device. While ``BPF_MAP_TYPE_DEVMAP_HASH``
+is backed by a hash table that uses a key to lookup a reference to a net device.
+The user provides either <``key``/ ``ifindex``> or <``key``/ ``struct bpf_devmap_val``>
+pairs to update the maps with new net devices.
+
+.. note::
+ - The key to a hash map doesn't have to be an ``ifindex``.
+ - While ``BPF_MAP_TYPE_DEVMAP_HASH`` allows for densely packing the net devices
+ it comes at the cost of a hash of the key when performing a look up.
+
+The setup and packet enqueue/send code is shared between the two types of
+devmap; only the lookup and insertion is different.
+
+Usage
+=====
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` this map contains
+references to net devices (for forwarding packets through other ports).
+
+The lower two bits of *flags* are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller. The higher bits of ``flags``
+can be set to ``BPF_F_BROADCAST`` or ``BPF_F_EXCLUDE_INGRESS`` as defined
+below.
+
+With ``BPF_F_BROADCAST`` the packet will be broadcast to all the interfaces
+in the map, with ``BPF_F_EXCLUDE_INGRESS`` the ingress interface will be excluded
+from the broadcast.
+
+.. note::
+ - The key is ignored if BPF_F_BROADCAST is set.
+ - The broadcast feature can also be used to implement multicast forwarding:
+ simply create multiple DEVMAPs, each one corresponding to a single multicast group.
+
+This helper will return ``XDP_REDIRECT`` on success, or the value of the two
+lower bits of the ``flags`` argument if the map lookup fails.
+
+More information about redirection can be found :doc:`redirect`
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+User space
+----------
+.. note::
+ DEVMAP entries can only be updated/deleted from user space and not
+ from an eBPF program. Trying to call these functions from a kernel eBPF
+ program will result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
+
+Net device entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_devmap_val`` or a simple ``int ifindex`` for backwards
+compatibility.
+
+ .. code-block:: c
+
+ struct bpf_devmap_val {
+ __u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+ };
+
+The ``flags`` argument can be one of the following:
+ - ``BPF_ANY``: Create a new element or update an existing element.
+ - ``BPF_NOEXIST``: Create a new element only if it did not exist.
+ - ``BPF_EXIST``: Update an existing element.
+
+DEVMAPs can associate a program with a device entry by adding a ``bpf_prog.fd``
+to ``struct bpf_devmap_val``. Programs are run after ``XDP_REDIRECT`` and have
+access to both Rx device and Tx device. The program associated with the ``fd``
+must have type XDP with expected attach type ``xdp_devmap``.
+When a program is associated with a device index, the program is run on an
+``XDP_REDIRECT`` and before the buffer is added to the per-cpu queue. Examples
+of how to attach/use xdp_devmap progs can be found in the kernel selftests:
+
+- ``tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c``
+- ``tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c``
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+ int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+ int bpf_map_delete_elem(int fd, const void *key);
+
+Net device entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP``
+called tx_port.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 256);
+ } tx_port SEC(".maps");
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP_HASH``
+called forward_map.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __type(key, __u32);
+ __type(value, struct bpf_devmap_val);
+ __uint(max_entries, 32);
+ } forward_map SEC(".maps");
+
+.. note::
+
+ The value type in the DEVMAP above is a ``struct bpf_devmap_val``
+
+The following code snippet shows a simple xdp_redirect_map program. This program
+would work with a user space program that populates the devmap ``forward_map`` based
+on ingress ifindexes. The BPF program (below) is redirecting packets using the
+ingress ``ifindex`` as the ``key``.
+
+.. code-block:: c
+
+ SEC("xdp")
+ int xdp_redirect_map_func(struct xdp_md *ctx)
+ {
+ int index = ctx->ingress_ifindex;
+
+ return bpf_redirect_map(&forward_map, index, 0);
+ }
+
+The following code snippet shows a BPF program that is broadcasting packets to
+all the interfaces in the ``tx_port`` devmap.
+
+.. code-block:: c
+
+ SEC("xdp")
+ int xdp_redirect_map_func(struct xdp_md *ctx)
+ {
+ return bpf_redirect_map(&tx_port, 0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ }
+
+User space
+----------
+
+The following code snippet shows how to update a devmap called ``tx_port``.
+
+.. code-block:: c
+
+ int update_devmap(int ifindex, int redirect_ifindex)
+ {
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port), &ifindex, &redirect_ifindex, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap_ value: %s\n",
+ strerror(errno));
+ }
+
+ return ret;
+ }
+
+The following code snippet shows how to update a hash_devmap called ``forward_map``.
+
+.. code-block:: c
+
+ int update_devmap(int ifindex, int redirect_ifindex)
+ {
+ struct bpf_devmap_val devmap_val = { .ifindex = redirect_ifindex };
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap_ value: %s\n",
+ strerror(errno));
+ }
+ return ret;
+ }
+
+References
+===========
+
+- https://lwn.net/Articles/728146/
+- https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=6f9d451ab1a33728adb72d7ff66a7b374d665176
+- https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4106
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
index e85120878b27..8669426264c6 100644
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@@ -34,7 +34,14 @@ the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.
Usage
=====
-.. c:function::
+Kernel BPF
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
Hash entries can be added or updated using the ``bpf_map_update_elem()``
@@ -49,14 +56,22 @@ parameter can be used to control the update behaviour:
``bpf_map_update_elem()`` returns 0 on success, or negative error in
case of failure.
-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
Hash entries can be retrieved using the ``bpf_map_lookup_elem()``
helper. This helper returns a pointer to the value associated with
``key``, or ``NULL`` if no entry was found.
-.. c:function::
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_delete_elem(struct bpf_map *map, const void *key)
Hash entries can be deleted using the ``bpf_map_delete_elem()``
@@ -70,7 +85,11 @@ For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers
automatically access the hash slot for the current CPU.
-.. c:function::
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the
@@ -89,7 +108,11 @@ See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
Userspace
---------
-.. c:function::
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key)
In userspace, it is possible to iterate through the keys of a hash using
diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
index 31be1aa7ba2c..74d64a30f500 100644
--- a/Documentation/bpf/map_lpm_trie.rst
+++ b/Documentation/bpf/map_lpm_trie.rst
@@ -35,7 +35,11 @@ Usage
Kernel BPF
----------
-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
The longest prefix entry for a given data value can be found using the
@@ -48,7 +52,11 @@ performing longest prefix lookups. For example, when searching for the
longest prefix match for an IPv4 address, ``prefixlen`` should be set to
``32``.
-.. c:function::
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
Prefix entries can be added or updated using the ``bpf_map_update_elem()``
@@ -61,7 +69,11 @@ case of failure.
The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST,
but the value is ignored, giving BPF_ANY semantics.
-.. c:function::
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_delete_elem(struct bpf_map *map, const void *key)
Prefix entries can be deleted using the ``bpf_map_delete_elem()``
@@ -74,7 +86,11 @@ Userspace
Access from userspace uses libbpf APIs with the same names as above, with
the map identified by ``fd``.
-.. c:function::
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key)
A userspace program can iterate through the entries in an LPM trie using
diff --git a/Documentation/bpf/map_of_maps.rst b/Documentation/bpf/map_of_maps.rst
index 07212b9227a9..7b5617c2d017 100644
--- a/Documentation/bpf/map_of_maps.rst
+++ b/Documentation/bpf/map_of_maps.rst
@@ -45,7 +45,11 @@ Usage
Kernel BPF Helper
-----------------
-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
Inner maps can be retrieved using the ``bpf_map_lookup_elem()`` helper. This
diff --git a/Documentation/bpf/map_queue_stack.rst b/Documentation/bpf/map_queue_stack.rst
index f20e31a647b9..8d14ed49d6e1 100644
--- a/Documentation/bpf/map_queue_stack.rst
+++ b/Documentation/bpf/map_queue_stack.rst
@@ -28,7 +28,11 @@ Usage
Kernel BPF
----------
-.. c:function::
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
An element ``value`` can be added to a queue or stack using the
@@ -38,14 +42,22 @@ when the queue or stack is full, the oldest element will be removed to
make room for ``value`` to be added. Returns ``0`` on success, or
negative error in case of failure.
-.. c:function::
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_peek_elem(struct bpf_map *map, void *value)
This helper fetches an element ``value`` from a queue or stack without
removing it. Returns ``0`` on success, or negative error in case of
failure.
-.. c:function::
+bpf_map_pop_elem()
+~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
long bpf_map_pop_elem(struct bpf_map *map, void *value)
This helper removes an element into ``value`` from a queue or
@@ -55,7 +67,11 @@ stack. Returns ``0`` on success, or negative error in case of failure.
Userspace
---------
-.. c:function::
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
A userspace program can push ``value`` onto a queue or stack using libbpf's
@@ -64,7 +80,11 @@ A userspace program can push ``value`` onto a queue or stack using libbpf's
same semantics as the ``bpf_map_push_elem`` kernel helper. Returns ``0`` on
success, or negative error in case of failure.
-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
int bpf_map_lookup_elem (int fd, const void *key, void *value)
A userspace program can peek at the ``value`` at the head of a queue or stack
@@ -72,7 +92,11 @@ using the libbpf ``bpf_map_lookup_elem`` function. The ``key`` parameter must be
set to ``NULL``. Returns ``0`` on success, or negative error in case of
failure.
-.. c:function::
+bpf_map_lookup_and_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
int bpf_map_lookup_and_delete_elem (int fd, const void *key, void *value)
A userspace program can pop a ``value`` from the head of a queue or stack using
diff --git a/Documentation/bpf/map_xskmap.rst b/Documentation/bpf/map_xskmap.rst
new file mode 100644
index 000000000000..7093b8208451
--- /dev/null
+++ b/Documentation/bpf/map_xskmap.rst
@@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_XSKMAP
+===================
+
+.. note::
+ - ``BPF_MAP_TYPE_XSKMAP`` was introduced in kernel version 4.18
+
+The ``BPF_MAP_TYPE_XSKMAP`` is used as a backend map for XDP BPF helper
+call ``bpf_redirect_map()`` and ``XDP_REDIRECT`` action, like 'devmap' and 'cpumap'.
+This map type redirects raw XDP frames to `AF_XDP`_ sockets (XSKs), a new type of
+address family in the kernel that allows redirection of frames from a driver to
+user space without having to traverse the full network stack. An AF_XDP socket
+binds to a single netdev queue. A mapping of XSKs to queues is shown below:
+
+.. code-block:: none
+
+ +---------------------------------------------------+
+ | xsk A | xsk B | xsk C |<---+ User space
+ =========================================================|==========
+ | Queue 0 | Queue 1 | Queue 2 | | Kernel
+ +---------------------------------------------------+ |
+ | Netdev eth0 | |
+ +---------------------------------------------------+ |
+ | +=============+ | |
+ | | key | xsk | | |
+ | +---------+ +=============+ | |
+ | | | | 0 | xsk A | | |
+ | | | +-------------+ | |
+ | | | | 1 | xsk B | | |
+ | | BPF |-- redirect -->+-------------+-------------+
+ | | prog | | 2 | xsk C | |
+ | | | +-------------+ |
+ | | | |
+ | | | |
+ | +---------+ |
+ | |
+ +---------------------------------------------------+
+
+.. note::
+ An AF_XDP socket that is bound to a certain <netdev/queue_id> will *only*
+ accept XDP frames from that <netdev/queue_id>. If an XDP program tries to redirect
+ from a <netdev/queue_id> other than what the socket is bound to, the frame will
+ not be received on the socket.
+
+Typically an XSKMAP is created per netdev. This map contains an array of XSK File
+Descriptors (FDs). The number of array elements is typically set or adjusted using
+the ``max_entries`` map parameter. For AF_XDP ``max_entries`` is equal to the number
+of queues supported by the netdev.
+
+.. note::
+ Both the map key and map value size must be 4 bytes.
+
+Usage
+=====
+
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_XSKMAP`` this map contains references to XSK FDs
+for sockets attached to a netdev's queues.
+
+.. note::
+ If the map is empty at an index, the packet is dropped. This means that it is
+ necessary to have an XDP program loaded with at least one XSK in the
+ XSKMAP to be able to get any traffic to user space through the socket.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+XSK entry references of type ``struct xdp_sock *`` can be retrieved using the
+``bpf_map_lookup_elem()`` helper.
+
+User space
+----------
+.. note::
+ XSK entries can only be updated/deleted from user space and not from
+ a BPF program. Trying to call these functions from a kernel BPF program will
+ result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags)
+
+XSK entries can be added or updated using the ``bpf_map_update_elem()``
+helper. The ``key`` parameter is equal to the queue_id of the queue the XSK
+is attaching to. And the ``value`` parameter is the FD value of that socket.
+
+Under the hood, the XSKMAP update function uses the XSK FD value to retrieve the
+associated ``struct xdp_sock`` instance.
+
+The flags argument can be one of the following:
+
+- BPF_ANY: Create a new element or update an existing element.
+- BPF_NOEXIST: Create a new element only if it did not exist.
+- BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int fd, const void *key, void *value)
+
+Returns ``struct xdp_sock *`` or negative error in case of failure.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_delete_elem(int fd, const void *key)
+
+XSK entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+.. note::
+ When `libxdp`_ deletes an XSK it also removes the associated socket
+ entry from the XSKMAP.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_XSKMAP`` called
+``xsks_map`` and how to redirect packets to an XSK.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 64);
+ } xsks_map SEC(".maps");
+
+
+ SEC("xdp")
+ int xsk_redir_prog(struct xdp_md *ctx)
+ {
+ __u32 index = ctx->rx_queue_index;
+
+ if (bpf_map_lookup_elem(&xsks_map, &index))
+ return bpf_redirect_map(&xsks_map, index, 0);
+ return XDP_PASS;
+ }
+
+User space
+----------
+
+The following code snippet shows how to update an XSKMAP with an XSK entry.
+
+.. code-block:: c
+
+ int update_xsks_map(struct bpf_map *xsks_map, int queue_id, int xsk_fd)
+ {
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(xsks_map), &queue_id, &xsk_fd, 0);
+ if (ret < 0)
+ fprintf(stderr, "Failed to update xsks_map: %s\n", strerror(errno));
+
+ return ret;
+ }
+
+For an example on how create AF_XDP sockets, please see the AF_XDP-example and
+AF_XDP-forwarding programs in the `bpf-examples`_ directory in the `libxdp`_ repository.
+For a detailed explaination of the AF_XDP interface please see:
+
+- `libxdp-readme`_.
+- `AF_XDP`_ kernel documentation.
+
+.. note::
+ The most comprehensive resource for using XSKMAPs and AF_XDP is `libxdp`_.
+
+.. _libxdp: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp
+.. _AF_XDP: https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+.. _bpf-examples: https://github.com/xdp-project/bpf-examples
+.. _libxdp-readme: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp#using-af_xdp-sockets
diff --git a/Documentation/bpf/programs.rst b/Documentation/bpf/programs.rst
index 620eb667ac7a..c99000ab6d9b 100644
--- a/Documentation/bpf/programs.rst
+++ b/Documentation/bpf/programs.rst
@@ -7,3 +7,6 @@ Program Types
:glob:
prog_*
+
+For a list of all program types, see :ref:`program_types_and_elf` in
+the :ref:`libbpf` documentation.
diff --git a/Documentation/bpf/redirect.rst b/Documentation/bpf/redirect.rst
new file mode 100644
index 000000000000..2fa2b0b05004
--- /dev/null
+++ b/Documentation/bpf/redirect.rst
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========
+Redirect
+========
+XDP_REDIRECT
+############
+Supported maps
+--------------
+
+XDP_REDIRECT works with the following map types:
+
+- ``BPF_MAP_TYPE_DEVMAP``
+- ``BPF_MAP_TYPE_DEVMAP_HASH``
+- ``BPF_MAP_TYPE_CPUMAP``
+- ``BPF_MAP_TYPE_XSKMAP``
+
+For more information on these maps, please see the specific map documentation.
+
+Process
+-------
+
+.. kernel-doc:: net/core/filter.c
+ :doc: xdp redirect
+
+.. note::
+ Not all drivers support transmitting frames after a redirect, and for
+ those that do, not all of them support non-linear frames. Non-linear xdp
+ bufs/frames are bufs/frames that contain more than one fragment.
+
+Debugging packet drops
+----------------------
+Silent packet drops for XDP_REDIRECT can be debugged using:
+
+- bpf_trace
+- perf_record
+
+bpf_trace
+^^^^^^^^^
+The following bpftrace command can be used to capture and count all XDP tracepoints:
+
+.. code-block:: none
+
+ sudo bpftrace -e 'tracepoint:xdp:* { @cnt[probe] = count(); }'
+ Attaching 12 probes...
+ ^C
+
+ @cnt[tracepoint:xdp:mem_connect]: 18
+ @cnt[tracepoint:xdp:mem_disconnect]: 18
+ @cnt[tracepoint:xdp:xdp_exception]: 19605
+ @cnt[tracepoint:xdp:xdp_devmap_xmit]: 1393604
+ @cnt[tracepoint:xdp:xdp_redirect]: 22292200
+
+.. note::
+ The various xdp tracepoints can be found in ``source/include/trace/events/xdp.h``
+
+The following bpftrace command can be used to extract the ``ERRNO`` being returned as
+part of the err parameter:
+
+.. code-block:: none
+
+ sudo bpftrace -e \
+ 'tracepoint:xdp:xdp_redirect*_err {@redir_errno[-args->err] = count();}
+ tracepoint:xdp:xdp_devmap_xmit {@devmap_errno[-args->err] = count();}'
+
+perf record
+^^^^^^^^^^^
+The perf tool also supports recording tracepoints:
+
+.. code-block:: none
+
+ perf record -a -e xdp:xdp_redirect_err \
+ -e xdp:xdp_redirect_map_err \
+ -e xdp:xdp_exception \
+ -e xdp:xdp_devmap_xmit
+
+References
+===========
+
+- https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c577e4c7c84b..67452103bb86 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -54,6 +54,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
+extern struct bpf_mem_alloc bpf_global_ma;
+extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -85,7 +87,8 @@ struct bpf_map_ops {
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
- int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
+ int (*map_update_batch)(struct bpf_map *map, struct file *map_file,
+ const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
@@ -135,7 +138,7 @@ struct bpf_map_ops {
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
- int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+ int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
@@ -165,9 +168,8 @@ struct bpf_map_ops {
};
enum {
- /* Support at most 8 pointers in a BTF type */
- BTF_FIELDS_MAX = 10,
- BPF_MAP_OFF_ARR_MAX = BTF_FIELDS_MAX,
+ /* Support at most 10 fields in a BTF type */
+ BTF_FIELDS_MAX = 10,
};
enum btf_field_type {
@@ -176,6 +178,8 @@ enum btf_field_type {
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
+ BPF_LIST_HEAD = (1 << 4),
+ BPF_LIST_NODE = (1 << 5),
};
struct btf_field_kptr {
@@ -185,11 +189,19 @@ struct btf_field_kptr {
u32 btf_id;
};
+struct btf_field_list_head {
+ struct btf *btf;
+ u32 value_btf_id;
+ u32 node_offset;
+ struct btf_record *value_rec;
+};
+
struct btf_field {
u32 offset;
enum btf_field_type type;
union {
struct btf_field_kptr kptr;
+ struct btf_field_list_head list_head;
};
};
@@ -203,8 +215,8 @@ struct btf_record {
struct btf_field_offs {
u32 cnt;
- u32 field_off[BPF_MAP_OFF_ARR_MAX];
- u8 field_sz[BPF_MAP_OFF_ARR_MAX];
+ u32 field_off[BTF_FIELDS_MAX];
+ u8 field_sz[BTF_FIELDS_MAX];
};
struct bpf_map {
@@ -267,6 +279,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
+ case BPF_LIST_HEAD:
+ return "bpf_list_head";
+ case BPF_LIST_NODE:
+ return "bpf_list_node";
default:
WARN_ON_ONCE(1);
return "unknown";
@@ -283,6 +299,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return sizeof(u64);
+ case BPF_LIST_HEAD:
+ return sizeof(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return sizeof(struct bpf_list_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -299,6 +319,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return __alignof__(u64);
+ case BPF_LIST_HEAD:
+ return __alignof__(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return __alignof__(struct bpf_list_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -312,16 +336,19 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}
-static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
{
- if (!IS_ERR_OR_NULL(map->record)) {
- struct btf_field *fields = map->record->fields;
- u32 cnt = map->record->cnt;
- int i;
+ int i;
- for (i = 0; i < cnt; i++)
- memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
- }
+ if (!foffs)
+ return;
+ for (i = 0; i < foffs->cnt; i++)
+ memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
+}
+
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+ bpf_obj_init(map->field_offs, dst);
}
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
@@ -361,7 +388,7 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
u32 sz = next_off - curr_off;
memcpy(dst + curr_off, src + curr_off, sz);
- curr_off = next_off + foffs->field_sz[i];
+ curr_off += foffs->field_sz[i] + sz;
}
memcpy(dst + curr_off, src + curr_off, size - curr_off);
}
@@ -391,7 +418,7 @@ static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32
u32 sz = next_off - curr_off;
memset(dst + curr_off, 0, sz);
- curr_off = next_off + foffs->field_sz[i];
+ curr_off += foffs->field_sz[i] + sz;
}
memset(dst + curr_off, 0, size - curr_off);
}
@@ -404,6 +431,9 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+ struct bpf_spin_lock *spin_lock);
+
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -472,10 +502,8 @@ enum bpf_type_flag {
*/
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
- /* MEM was "allocated" from a different helper, and cannot be mixed
- * with regular non-MEM_ALLOC'ed MEM types.
- */
- MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS),
+ /* MEM points to BPF ring buffer reservation. */
+ MEM_RINGBUF = BIT(2 + BPF_BASE_TYPE_BITS),
/* MEM is in user address space. */
MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS),
@@ -510,6 +538,43 @@ enum bpf_type_flag {
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+ /* MEM is of an allocated object of type in program BTF. This is used to
+ * tag PTR_TO_BTF_ID allocated using bpf_obj_new.
+ */
+ MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
+
+ /* PTR was passed from the kernel in a trusted context, and may be
+ * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+ * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
+ * PTR_UNTRUSTED refers to a kptr that was read directly from a map
+ * without invoking bpf_kptr_xchg(). What we really need to know is
+ * whether a pointer is safe to pass to a kfunc or BPF helper function.
+ * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF
+ * helpers, they do not cover all possible instances of unsafe
+ * pointers. For example, a pointer that was obtained from walking a
+ * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the
+ * fact that it may be NULL, invalid, etc. This is due to backwards
+ * compatibility requirements, as this was the behavior that was first
+ * introduced when kptrs were added. The behavior is now considered
+ * deprecated, and PTR_UNTRUSTED will eventually be removed.
+ *
+ * PTR_TRUSTED, on the other hand, is a pointer that the kernel
+ * guarantees to be valid and safe to pass to kfuncs and BPF helpers.
+ * For example, pointers passed to tracepoint arguments are considered
+ * PTR_TRUSTED, as are pointers that are passed to struct_ops
+ * callbacks. As alluded to above, pointers that are obtained from
+ * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a
+ * struct task_struct *task is PTR_TRUSTED, then accessing
+ * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored
+ * in a BPF register. Similarly, pointers passed to certain programs
+ * types such as kretprobes are not guaranteed to be valid, as they may
+ * for example contain an object that was recently freed.
+ */
+ PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS),
+
+ /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+ MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -549,7 +614,7 @@ enum bpf_arg_type {
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
- ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
+ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
@@ -566,7 +631,6 @@ enum bpf_arg_type {
ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
- ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
/* pointer to memory does not need to be initialized, helper function must fill
@@ -591,7 +655,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET, /* returns a pointer to a socket */
RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */
RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */
- RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
+ RET_PTR_TO_MEM, /* returns a pointer to memory */
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
__BPF_RET_TYPE_MAX,
@@ -601,9 +665,10 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
- RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
- RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+ RET_PTR_TO_RINGBUF_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
+ RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+ RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -620,6 +685,7 @@ struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
bool pkt_access;
+ bool might_sleep;
enum bpf_return_type ret_type;
union {
struct {
@@ -758,6 +824,7 @@ struct bpf_prog_ops {
union bpf_attr __user *uattr);
};
+struct bpf_reg_state;
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *
@@ -779,9 +846,8 @@ struct bpf_verifier_ops {
struct bpf_insn *dst,
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id, enum bpf_type_flag *flag);
};
@@ -1794,7 +1860,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
-int generic_map_update_batch(struct bpf_map *map,
+int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_delete_batch(struct bpf_map *map,
@@ -2085,9 +2151,9 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
return btf_ctx_access(off, size, type, prog, info);
}
-int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id, enum bpf_type_flag *flag);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
@@ -2100,22 +2166,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
const char *func_name,
struct btf_func_model *m);
-struct bpf_kfunc_arg_meta {
- u64 r0_size;
- bool r0_rdonly;
- int ref_obj_id;
- u32 flags;
-};
-
struct bpf_reg_state;
int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
- const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs,
- struct bpf_kfunc_arg_meta *meta);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -2338,9 +2393,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
}
static inline int btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id, enum bpf_type_flag *flag)
{
return -EACCES;
@@ -2797,4 +2851,10 @@ struct bpf_key {
bool has_ref;
};
#endif /* CONFIG_KEYS */
+
+static inline bool type_is_alloc(u32 type)
+{
+ return type & MEM_ALLOC;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1a32baa78ce2..c05aa6e1f6f5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -19,7 +19,7 @@
*/
#define BPF_MAX_VAR_SIZ (1 << 29)
/* size of type_str_buf in bpf_verifier. */
-#define TYPE_STR_BUF_LEN 64
+#define TYPE_STR_BUF_LEN 128
/* Liveness marks, used for registers and spilled-regs (in stack slots).
* Read marks propagate upwards until they find a write mark; they record that
@@ -223,6 +223,11 @@ struct bpf_reference_state {
* exiting a callback function.
*/
int callback_ref;
+ /* Mark the reference state to release the registers sharing the same id
+ * on bpf_spin_unlock (for nodes that we will lose ownership to but are
+ * safe to access inside the critical section).
+ */
+ bool release_on_unlock;
};
/* state of the program:
@@ -323,8 +328,23 @@ struct bpf_verifier_state {
u32 branches;
u32 insn_idx;
u32 curframe;
- u32 active_spin_lock;
+ /* For every reg representing a map value or allocated object pointer,
+ * we consider the tuple of (ptr, id) for them to be unique in verifier
+ * context and conside them to not alias each other for the purposes of
+ * tracking lock state.
+ */
+ struct {
+ /* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
+ * there's no active lock held, and other fields have no
+ * meaning. If non-NULL, it indicates that a lock is held and
+ * id member has the reg->id of the register which can be >= 0.
+ */
+ void *ptr;
+ /* This will be reg->id */
+ u32 id;
+ } active_lock;
bool speculative;
+ bool active_rcu_lock;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
@@ -419,11 +439,14 @@ struct bpf_insn_aux_data {
*/
struct bpf_loop_inline_state loop_inline_state;
};
+ u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */
+ struct btf_struct_meta *kptr_struct_meta;
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
+ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
@@ -513,6 +536,7 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
+ bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
@@ -589,8 +613,6 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type);
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
@@ -661,4 +683,11 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
}
}
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
+
+static inline bool bpf_type_has_unsafe_modifiers(u32 type)
+{
+ return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
+}
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d80345fa566b..9ed00077db6e 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,8 @@
#include <linux/types.h>
#include <linux/bpfptr.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
#include <uapi/linux/btf.h>
#include <uapi/linux/bpf.h>
@@ -17,36 +19,53 @@
#define KF_RELEASE (1 << 1) /* kfunc is a release function */
#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */
#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */
-/* Trusted arguments are those which are meant to be referenced arguments with
- * unchanged offset. It is used to enforce that pointers obtained from acquire
- * kfuncs remain unmodified when being passed to helpers taking trusted args.
+/* Trusted arguments are those which are guaranteed to be valid when passed to
+ * the kfunc. It is used to enforce that pointers obtained from either acquire
+ * kfuncs, or from the main kernel on a tracepoint or struct_ops callback
+ * invocation, remain unmodified when being passed to helpers taking trusted
+ * args.
*
- * Consider
- * struct foo {
- * int data;
- * struct foo *next;
- * };
+ * Consider, for example, the following new task tracepoint:
*
- * struct bar {
- * int data;
- * struct foo f;
- * };
+ * SEC("tp_btf/task_newtask")
+ * int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags)
+ * {
+ * ...
+ * }
*
- * struct foo *f = alloc_foo(); // Acquire kfunc
- * struct bar *b = alloc_bar(); // Acquire kfunc
+ * And the following kfunc:
*
- * If a kfunc set_foo_data() wants to operate only on the allocated object, it
- * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like:
+ * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
*
- * set_foo_data(f, 42); // Allowed
- * set_foo_data(f->next, 42); // Rejected, non-referenced pointer
- * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type
- * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset
+ * All invocations to the kfunc must pass the unmodified, unwalked task:
*
- * In the final case, usually for the purposes of type matching, it is deduced
- * by looking at the type of the member at the offset, but due to the
- * requirement of trusted argument, this deduction will be strict and not done
- * for this case.
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(task->last_wakee); // Rejected, walked task
+ *
+ * Programs may also pass referenced tasks directly to the kfunc:
+ *
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(task); // Allowed, same as above
+ * bpf_task_acquire(acquired); // Allowed
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(acquired->last_wakee); // Rejected, walked task
+ *
+ * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or
+ * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these
+ * pointers are guaranteed to be safe. For example, the following BPF program
+ * would be rejected:
+ *
+ * SEC("kretprobe/free_task")
+ * int BPF_PROG(free_task_probe, struct task_struct *tsk)
+ * {
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer
+ * bpf_task_release(acquired);
+ *
+ * return 0;
+ * }
*/
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
@@ -78,6 +97,17 @@ struct btf_id_dtor_kfunc {
u32 kfunc_btf_id;
};
+struct btf_struct_meta {
+ u32 btf_id;
+ struct btf_record *record;
+ struct btf_field_offs *field_offs;
+};
+
+struct btf_struct_metas {
+ u32 cnt;
+ struct btf_struct_meta types[];
+};
+
typedef void (*btf_dtor_kfunc_t)(void *);
extern const struct file_operations btf_fops;
@@ -165,6 +195,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size);
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
@@ -324,6 +355,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
@@ -408,9 +449,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
return (struct btf_param *)(t + 1);
}
-#ifdef CONFIG_BPF_SYSCALL
+static inline int btf_id_cmp_func(const void *a, const void *b)
+{
+ const int *pa = a, *pb = b;
+
+ return *pa - *pb;
+}
+
+static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+{
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
+
+static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
struct bpf_prog;
+struct bpf_verifier_log;
+#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
@@ -423,6 +482,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -454,6 +521,26 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt
{
return 0;
}
+static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+ return NULL;
+}
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
+{
+ return NULL;
+}
+static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
+ enum bpf_prog_type prog_type) {
+ return -EINVAL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ return false;
+}
#endif
static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index c9744efd202f..93397711a68c 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -204,7 +204,7 @@ extern struct btf_id_set8 name;
#else
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[16];
#define BTF_ID(prefix, name)
#define BTF_ID_FLAGS(prefix, name, ...)
#define BTF_ID_UNUSED
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index eb0466236661..7c1afe0f4129 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# endif
# define __iomem
# define __percpu BTF_TYPE_TAG(percpu)
-# define __rcu
+# define __rcu BTF_TYPE_TAG(rcu)
+
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
/* context/locking */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index efc42a6e3aed..bf701976056e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -568,10 +568,10 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
extern struct mutex nf_conn_btf_access_lock;
-extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype, u32 *next_btf_id,
- enum bpf_type_flag *flag);
+extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag);
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
const struct bpf_insn *insnsi,
@@ -643,13 +643,13 @@ struct bpf_nh_params {
};
struct bpf_redirect_info {
- u32 flags;
- u32 tgt_index;
+ u64 tgt_index;
void *tgt_value;
struct bpf_map *map;
+ u32 flags;
+ u32 kern_flags;
u32 map_id;
enum bpf_map_type map_type;
- u32 kern_flags;
struct bpf_nh_params nh;
};
@@ -1504,7 +1504,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
u64 flags, const u64 flag_mask,
void *lookup_elem(struct bpf_map *map, u32 key))
{
@@ -1515,7 +1515,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
if (unlikely(flags & ~(action_mask | flag_mask)))
return XDP_ABORTED;
- ri->tgt_value = lookup_elem(map, ifindex);
+ ri->tgt_value = lookup_elem(map, index);
if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
/* If the lookup fails we want to clear out the state in the
* redirect_info struct completely, so that if an eBPF program
@@ -1527,7 +1527,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
return flags & action_mask;
}
- ri->tgt_index = ifindex;
+ ri->tgt_index = index;
ri->map_id = map->id;
ri->map_type = map->map_type;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 23b3903b0678..5aa35c58c342 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3135,7 +3135,6 @@ struct softnet_data {
/* stats */
unsigned int processed;
unsigned int time_squeeze;
- unsigned int received_rps;
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
#endif
@@ -3168,6 +3167,7 @@ struct softnet_data {
unsigned int cpu;
unsigned int input_queue_tail;
#endif
+ unsigned int received_rps;
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fb4c911d2a03..f89de51a45db 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2584,14 +2584,19 @@ union bpf_attr {
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * **TCP_BPF_RTO_MIN**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * * **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -2647,7 +2652,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -2808,12 +2813,10 @@ union bpf_attr {
* and **BPF_CGROUP_INET6_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
- * It supports the following *level*\ s:
- *
- * * **IPPROTO_TCP**, which supports *optname*
- * **TCP_CONGESTION**.
- * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * It supports the same set of *optname*\ s that is supported by
+ * the **bpf_setsockopt**\ () helper. The exceptions are
+ * **TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -6888,6 +6891,16 @@ struct bpf_dynptr {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_list_head {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 672eb17ac421..484706959556 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -430,7 +430,6 @@ static void array_map_free(struct bpf_map *map)
for (i = 0; i < array->map.max_entries; i++)
bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
}
- bpf_map_free_record(map);
}
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index d6c9b3705f24..ae0267f150b5 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
.func = bpf_ima_inode_hash,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
@@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
.func = bpf_ima_file_hash,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
@@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
- return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
+ return &bpf_ima_inode_hash_proto;
case BPF_FUNC_ima_file_hash:
- return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
+ return &bpf_ima_file_hash_proto;
case BPF_FUNC_get_attach_cookie:
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
#ifdef CONFIG_NET
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 5579ff3a5b54..d11cbf8cece7 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -199,6 +199,7 @@ DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
enum btf_kfunc_hook {
+ BTF_KFUNC_HOOK_COMMON,
BTF_KFUNC_HOOK_XDP,
BTF_KFUNC_HOOK_TC,
BTF_KFUNC_HOOK_STRUCT_OPS,
@@ -237,6 +238,7 @@ struct btf {
struct rcu_head rcu;
struct btf_kfunc_set_tab *kfunc_set_tab;
struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
+ struct btf_struct_metas *struct_meta_tab;
/* split BTF support */
struct btf *base_btf;
@@ -477,16 +479,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
return !t || btf_type_nosize(t);
}
-static bool __btf_type_is_struct(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
-}
-
-static bool btf_type_is_array(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
-}
-
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
@@ -1642,8 +1634,30 @@ static void btf_free_dtor_kfunc_tab(struct btf *btf)
btf->dtor_kfunc_tab = NULL;
}
+static void btf_struct_metas_free(struct btf_struct_metas *tab)
+{
+ int i;
+
+ if (!tab)
+ return;
+ for (i = 0; i < tab->cnt; i++) {
+ btf_record_free(tab->types[i].record);
+ kfree(tab->types[i].field_offs);
+ }
+ kfree(tab);
+}
+
+static void btf_free_struct_meta_tab(struct btf *btf)
+{
+ struct btf_struct_metas *tab = btf->struct_meta_tab;
+
+ btf_struct_metas_free(tab);
+ btf->struct_meta_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
+ btf_free_struct_meta_tab(btf);
btf_free_dtor_kfunc_tab(btf);
btf_free_kfunc_set_tab(btf);
kvfree(btf->types);
@@ -3205,9 +3219,15 @@ enum {
struct btf_field_info {
enum btf_field_type type;
u32 off;
- struct {
- u32 type_id;
- } kptr;
+ union {
+ struct {
+ u32 type_id;
+ } kptr;
+ struct {
+ const char *node_name;
+ u32 value_btf_id;
+ } list_head;
+ };
};
static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
@@ -3261,6 +3281,63 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
return BTF_FIELD_FOUND;
}
+static const char *btf_find_decl_tag_value(const struct btf *btf,
+ const struct btf_type *pt,
+ int comp_idx, const char *tag_key)
+{
+ int i;
+
+ for (i = 1; i < btf_nr_types(btf); i++) {
+ const struct btf_type *t = btf_type_by_id(btf, i);
+ int len = strlen(tag_key);
+
+ if (!btf_type_is_decl_tag(t))
+ continue;
+ if (pt != btf_type_by_id(btf, t->type) ||
+ btf_type_decl_tag(t)->component_idx != comp_idx)
+ continue;
+ if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
+ continue;
+ return __btf_name_by_offset(btf, t->name_off) + len;
+ }
+ return NULL;
+}
+
+static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
+ const struct btf_type *t, int comp_idx,
+ u32 off, int sz, struct btf_field_info *info)
+{
+ const char *value_type;
+ const char *list_node;
+ s32 id;
+
+ if (!__btf_type_is_struct(t))
+ return BTF_FIELD_IGNORE;
+ if (t->size != sz)
+ return BTF_FIELD_IGNORE;
+ value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
+ if (!value_type)
+ return -EINVAL;
+ list_node = strstr(value_type, ":");
+ if (!list_node)
+ return -EINVAL;
+ value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+ if (!value_type)
+ return -ENOMEM;
+ id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
+ kfree(value_type);
+ if (id < 0)
+ return id;
+ list_node++;
+ if (str_is_empty(list_node))
+ return -EINVAL;
+ info->type = BPF_LIST_HEAD;
+ info->off = off;
+ info->list_head.value_btf_id = id;
+ info->list_head.node_name = list_node;
+ return BTF_FIELD_FOUND;
+}
+
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
@@ -3284,6 +3361,18 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
goto end;
}
}
+ if (field_mask & BPF_LIST_HEAD) {
+ if (!strcmp(name, "bpf_list_head")) {
+ type = BPF_LIST_HEAD;
+ goto end;
+ }
+ }
+ if (field_mask & BPF_LIST_NODE) {
+ if (!strcmp(name, "bpf_list_node")) {
+ type = BPF_LIST_NODE;
+ goto end;
+ }
+ }
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
type = BPF_KPTR_REF;
@@ -3327,6 +3416,7 @@ static int btf_find_struct_field(const struct btf *btf,
switch (field_type) {
case BPF_SPIN_LOCK:
case BPF_TIMER:
+ case BPF_LIST_NODE:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3339,6 +3429,12 @@ static int btf_find_struct_field(const struct btf *btf,
if (ret < 0)
return ret;
break;
+ case BPF_LIST_HEAD:
+ ret = btf_find_list_head(btf, t, member_type, i, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
default:
return -EFAULT;
}
@@ -3381,6 +3477,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
switch (field_type) {
case BPF_SPIN_LOCK:
case BPF_TIMER:
+ case BPF_LIST_NODE:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3393,6 +3490,12 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
if (ret < 0)
return ret;
break;
+ case BPF_LIST_HEAD:
+ ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
default:
return -EFAULT;
}
@@ -3491,11 +3594,52 @@ end_btf:
return ret;
}
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ const struct btf_type *t, *n = NULL;
+ const struct btf_member *member;
+ u32 offset;
+ int i;
+
+ t = btf_type_by_id(btf, info->list_head.value_btf_id);
+ /* We've already checked that value_btf_id is a struct type. We
+ * just need to figure out the offset of the list_node, and
+ * verify its type.
+ */
+ for_each_member(i, t, member) {
+ if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off)))
+ continue;
+ /* Invalid BTF, two members with same name */
+ if (n)
+ return -EINVAL;
+ n = btf_type_by_id(btf, member->type);
+ if (!__btf_type_is_struct(n))
+ return -EINVAL;
+ if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+ return -EINVAL;
+ offset = __btf_member_bit_offset(n, member);
+ if (offset % 8)
+ return -EINVAL;
+ offset /= 8;
+ if (offset % __alignof__(struct bpf_list_node))
+ return -EINVAL;
+
+ field->list_head.btf = (struct btf *)btf;
+ field->list_head.value_btf_id = info->list_head.value_btf_id;
+ field->list_head.node_offset = offset;
+ }
+ if (!n)
+ return -ENOENT;
+ return 0;
+}
+
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
struct btf_field_info info_arr[BTF_FIELDS_MAX];
struct btf_record *rec;
+ u32 next_off = 0;
int ret, i, cnt;
ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
@@ -3505,6 +3649,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
return NULL;
cnt = ret;
+ /* This needs to be kzalloc to zero out padding and unused fields, see
+ * comment in btf_record_equal.
+ */
rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
if (!rec)
return ERR_PTR(-ENOMEM);
@@ -3517,6 +3664,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
ret = -EFAULT;
goto end;
}
+ if (info_arr[i].off < next_off) {
+ ret = -EEXIST;
+ goto end;
+ }
+ next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type);
rec->field_mask |= info_arr[i].type;
rec->fields[i].offset = info_arr[i].off;
@@ -3539,18 +3691,93 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
+ case BPF_LIST_HEAD:
+ ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]);
+ if (ret < 0)
+ goto end;
+ break;
+ case BPF_LIST_NODE:
+ break;
default:
ret = -EFAULT;
goto end;
}
rec->cnt++;
}
+
+ /* bpf_list_head requires bpf_spin_lock */
+ if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+ ret = -EINVAL;
+ goto end;
+ }
+
return rec;
end:
btf_record_free(rec);
return ERR_PTR(ret);
}
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
+{
+ int i;
+
+ /* There are two owning types, kptr_ref and bpf_list_head. The former
+ * only supports storing kernel types, which can never store references
+ * to program allocated local types, atleast not yet. Hence we only need
+ * to ensure that bpf_list_head ownership does not form cycles.
+ */
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
+ return 0;
+ for (i = 0; i < rec->cnt; i++) {
+ struct btf_struct_meta *meta;
+ u32 btf_id;
+
+ if (!(rec->fields[i].type & BPF_LIST_HEAD))
+ continue;
+ btf_id = rec->fields[i].list_head.value_btf_id;
+ meta = btf_find_struct_meta(btf, btf_id);
+ if (!meta)
+ return -EFAULT;
+ rec->fields[i].list_head.value_rec = meta->record;
+
+ if (!(rec->field_mask & BPF_LIST_NODE))
+ continue;
+
+ /* We need to ensure ownership acyclicity among all types. The
+ * proper way to do it would be to topologically sort all BTF
+ * IDs based on the ownership edges, since there can be multiple
+ * bpf_list_head in a type. Instead, we use the following
+ * reasoning:
+ *
+ * - A type can only be owned by another type in user BTF if it
+ * has a bpf_list_node.
+ * - A type can only _own_ another type in user BTF if it has a
+ * bpf_list_head.
+ *
+ * We ensure that if a type has both bpf_list_head and
+ * bpf_list_node, its element types cannot be owning types.
+ *
+ * To ensure acyclicity:
+ *
+ * When A only has bpf_list_head, ownership chain can be:
+ * A -> B -> C
+ * Where:
+ * - B has both bpf_list_head and bpf_list_node.
+ * - C only has bpf_list_node.
+ *
+ * When A has both bpf_list_head and bpf_list_node, some other
+ * type already owns it in the BTF domain, hence it can not own
+ * another owning type through any of the bpf_list_head edges.
+ * A -> B
+ * Where:
+ * - B only has bpf_list_node.
+ */
+ if (meta->record->field_mask & BPF_LIST_HEAD)
+ return -ELOOP;
+ }
+ return 0;
+}
+
static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv)
{
const u32 a = *(const u32 *)_a;
@@ -3584,7 +3811,7 @@ struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec)
u8 *sz;
BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz));
- if (IS_ERR_OR_NULL(rec) || WARN_ON_ONCE(rec->cnt > sizeof(foffs->field_off)))
+ if (IS_ERR_OR_NULL(rec))
return NULL;
foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN);
@@ -4552,7 +4779,6 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
nr_args--;
}
- err = 0;
for (i = 0; i < nr_args; i++) {
const struct btf_type *arg_type;
u32 arg_type_id;
@@ -4561,8 +4787,12 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
arg_type = btf_type_by_id(btf, arg_type_id);
if (!arg_type) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
- err = -EINVAL;
- break;
+ return -EINVAL;
+ }
+
+ if (btf_type_is_resolve_source_only(arg_type)) {
+ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+ return -EINVAL;
}
if (args[i].name_off &&
@@ -4570,25 +4800,23 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
!btf_name_valid_identifier(btf, args[i].name_off))) {
btf_verifier_log_type(env, t,
"Invalid arg#%u", i + 1);
- err = -EINVAL;
- break;
+ return -EINVAL;
}
if (btf_type_needs_resolve(arg_type) &&
!env_type_is_resolved(env, arg_type_id)) {
err = btf_resolve(env, arg_type, arg_type_id);
if (err)
- break;
+ return err;
}
if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
- err = -EINVAL;
- break;
+ return -EINVAL;
}
}
- return err;
+ return 0;
}
static int btf_func_check(struct btf_verifier_env *env,
@@ -5002,6 +5230,119 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return btf_check_sec_info(env, btf_data_size);
}
+static const char *alloc_obj_fields[] = {
+ "bpf_spin_lock",
+ "bpf_list_head",
+ "bpf_list_node",
+};
+
+static struct btf_struct_metas *
+btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
+{
+ union {
+ struct btf_id_set set;
+ struct {
+ u32 _cnt;
+ u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
+ } _arr;
+ } aof;
+ struct btf_struct_metas *tab = NULL;
+ int i, n, id, ret;
+
+ BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
+ BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
+
+ memset(&aof, 0, sizeof(aof));
+ for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
+ /* Try to find whether this special type exists in user BTF, and
+ * if so remember its ID so we can easily find it among members
+ * of structs that we iterate in the next loop.
+ */
+ id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
+ if (id < 0)
+ continue;
+ aof.set.ids[aof.set.cnt++] = id;
+ }
+
+ if (!aof.set.cnt)
+ return NULL;
+ sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
+
+ n = btf_nr_types(btf);
+ for (i = 1; i < n; i++) {
+ struct btf_struct_metas *new_tab;
+ const struct btf_member *member;
+ struct btf_field_offs *foffs;
+ struct btf_struct_meta *type;
+ struct btf_record *record;
+ const struct btf_type *t;
+ int j, tab_cnt;
+
+ t = btf_type_by_id(btf, i);
+ if (!t) {
+ ret = -EINVAL;
+ goto free;
+ }
+ if (!__btf_type_is_struct(t))
+ continue;
+
+ cond_resched();
+
+ for_each_member(j, t, member) {
+ if (btf_id_set_contains(&aof.set, member->type))
+ goto parse;
+ }
+ continue;
+ parse:
+ tab_cnt = tab ? tab->cnt : 0;
+ new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_tab) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ if (!tab)
+ new_tab->cnt = 0;
+ tab = new_tab;
+
+ type = &tab->types[tab->cnt];
+ type->btf_id = i;
+ record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
+ /* The record cannot be unset, treat it as an error if so */
+ if (IS_ERR_OR_NULL(record)) {
+ ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
+ goto free;
+ }
+ foffs = btf_parse_field_offs(record);
+ /* We need the field_offs to be valid for a valid record,
+ * either both should be set or both should be unset.
+ */
+ if (IS_ERR_OR_NULL(foffs)) {
+ btf_record_free(record);
+ ret = -EFAULT;
+ goto free;
+ }
+ type->record = record;
+ type->field_offs = foffs;
+ tab->cnt++;
+ }
+ return tab;
+free:
+ btf_struct_metas_free(tab);
+ return ERR_PTR(ret);
+}
+
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+ struct btf_struct_metas *tab;
+
+ BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0);
+ tab = btf->struct_meta_tab;
+ if (!tab)
+ return NULL;
+ return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func);
+}
+
static int btf_check_type_tags(struct btf_verifier_env *env,
struct btf *btf, int start_id)
{
@@ -5052,6 +5393,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
u32 log_level, char __user *log_ubuf, u32 log_size)
{
+ struct btf_struct_metas *struct_meta_tab;
struct btf_verifier_env *env = NULL;
struct bpf_verifier_log *log;
struct btf *btf = NULL;
@@ -5120,15 +5462,34 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
if (err)
goto errout;
+ struct_meta_tab = btf_parse_struct_metas(log, btf);
+ if (IS_ERR(struct_meta_tab)) {
+ err = PTR_ERR(struct_meta_tab);
+ goto errout;
+ }
+ btf->struct_meta_tab = struct_meta_tab;
+
+ if (struct_meta_tab) {
+ int i;
+
+ for (i = 0; i < struct_meta_tab->cnt; i++) {
+ err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record);
+ if (err < 0)
+ goto errout_meta;
+ }
+ }
+
if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
- goto errout;
+ goto errout_meta;
}
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
+errout_meta:
+ btf_free_struct_meta_tab(btf);
errout:
btf_verifier_env_free(env);
if (btf)
@@ -5170,7 +5531,7 @@ static u8 bpf_ctx_convert_map[] = {
#undef BPF_MAP_TYPE
#undef BPF_LINK_TYPE
-static const struct btf_member *
+const struct btf_member *
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, enum bpf_prog_type prog_type,
int arg)
@@ -5243,6 +5604,26 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
return kern_ctx_type->type;
}
+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type)
+{
+ const struct btf_member *kctx_member;
+ const struct btf_type *conv_struct;
+ const struct btf_type *kctx_type;
+ u32 kctx_type_id;
+
+ conv_struct = bpf_ctx_convert.t;
+ /* get member for kernel ctx type */
+ kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
+ kctx_type_id = kctx_member->type;
+ kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id);
+ if (!btf_type_is_struct(kctx_type)) {
+ bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id);
+ return -EINVAL;
+ }
+
+ return kctx_type_id;
+}
+
BTF_ID_LIST(bpf_ctx_convert_btf_id)
BTF_ID(struct, bpf_ctx_convert)
@@ -5440,6 +5821,21 @@ static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
return nr_args + 1;
}
+static bool prog_args_trusted(const struct bpf_prog *prog)
+{
+ enum bpf_attach_type atype = prog->expected_attach_type;
+
+ switch (prog->type) {
+ case BPF_PROG_TYPE_TRACING:
+ return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -5583,6 +5979,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
info->reg_type = PTR_TO_BTF_ID;
+ if (prog_args_trusted(prog))
+ info->reg_type |= PTR_TRUSTED;
+
if (tgt_prog) {
enum bpf_prog_type tgt_type;
@@ -5849,6 +6248,9 @@ error:
/* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU;
+ /* check __rcu tag */
+ if (strcmp(tag_value, "rcu") == 0)
+ tmp_flag = MEM_RCU;
}
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
@@ -5878,20 +6280,50 @@ error:
return -EINVAL;
}
-int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype __maybe_unused,
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype __maybe_unused,
u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ const struct btf *btf = reg->btf;
enum bpf_type_flag tmp_flag = 0;
+ const struct btf_type *t;
+ u32 id = reg->btf_id;
int err;
- u32 id;
+ while (type_is_alloc(reg->type)) {
+ struct btf_struct_meta *meta;
+ struct btf_record *rec;
+ int i;
+
+ meta = btf_find_struct_meta(btf, id);
+ if (!meta)
+ break;
+ rec = meta->record;
+ for (i = 0; i < rec->cnt; i++) {
+ struct btf_field *field = &rec->fields[i];
+ u32 offset = field->offset;
+ if (off < offset + btf_field_type_size(field->type) && offset < off + size) {
+ bpf_log(log,
+ "direct access to %s is disallowed\n",
+ btf_field_type_name(field->type));
+ return -EACCES;
+ }
+ }
+ break;
+ }
+
+ t = btf_type_by_id(btf, id);
do {
err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
switch (err) {
case WALK_PTR:
+ /* For local types, the destination register cannot
+ * become a pointer again.
+ */
+ if (type_is_alloc(reg->type))
+ return SCALAR_VALUE;
/* If we found the pointer or scalar on t+off,
* we're done.
*/
@@ -5926,8 +6358,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
* end up with two different module BTFs, but IDs point to the common type in
* vmlinux BTF.
*/
-static bool btf_types_are_same(const struct btf *btf1, u32 id1,
- const struct btf *btf2, u32 id2)
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
{
if (id1 != id2)
return false;
@@ -6209,122 +6641,19 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
return btf_check_func_type_match(log, btf1, t1, btf2, t2);
}
-static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
-#ifdef CONFIG_NET
- [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
- [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
- [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
-#endif
-};
-
-/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
-static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int rec)
-{
- const struct btf_type *member_type;
- const struct btf_member *member;
- u32 i;
-
- if (!btf_type_is_struct(t))
- return false;
-
- for_each_member(i, t, member) {
- const struct btf_array *array;
-
- member_type = btf_type_skip_modifiers(btf, member->type, NULL);
- if (btf_type_is_struct(member_type)) {
- if (rec >= 3) {
- bpf_log(log, "max struct nesting depth exceeded\n");
- return false;
- }
- if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1))
- return false;
- continue;
- }
- if (btf_type_is_array(member_type)) {
- array = btf_type_array(member_type);
- if (!array->nelems)
- return false;
- member_type = btf_type_skip_modifiers(btf, array->type, NULL);
- if (!btf_type_is_scalar(member_type))
- return false;
- continue;
- }
- if (!btf_type_is_scalar(member_type))
- return false;
- }
- return true;
-}
-
-static bool is_kfunc_arg_mem_size(const struct btf *btf,
- const struct btf_param *arg,
- const struct bpf_reg_state *reg)
-{
- int len, sfx_len = sizeof("__sz") - 1;
- const struct btf_type *t;
- const char *param_name;
-
- t = btf_type_skip_modifiers(btf, arg->type, NULL);
- if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
- return false;
-
- /* In the future, this can be ported to use BTF tagging */
- param_name = btf_name_by_offset(btf, arg->name_off);
- if (str_is_empty(param_name))
- return false;
- len = strlen(param_name);
- if (len < sfx_len)
- return false;
- param_name += len - sfx_len;
- if (strncmp(param_name, "__sz", sfx_len))
- return false;
-
- return true;
-}
-
-static bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
- const struct btf_param *arg,
- const struct bpf_reg_state *reg,
- const char *name)
-{
- int len, target_len = strlen(name);
- const struct btf_type *t;
- const char *param_name;
-
- t = btf_type_skip_modifiers(btf, arg->type, NULL);
- if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
- return false;
-
- param_name = btf_name_by_offset(btf, arg->name_off);
- if (str_is_empty(param_name))
- return false;
- len = strlen(param_name);
- if (len != target_len)
- return false;
- if (strcmp(param_name, name))
- return false;
-
- return true;
-}
-
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
bool ptr_to_mem_ok,
- struct bpf_kfunc_arg_meta *kfunc_meta,
bool processing_call)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
- bool rel = false, kptr_get = false, trusted_args = false;
- bool sleepable = false;
struct bpf_verifier_log *log = &env->log;
- u32 i, nargs, ref_id, ref_obj_id = 0;
- bool is_kfunc = btf_is_kernel(btf);
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
- int ref_regno = 0, ret;
+ u32 i, nargs, ref_id;
+ int ret;
t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
@@ -6350,14 +6679,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
- if (is_kfunc && kfunc_meta) {
- /* Only kfunc can be release func */
- rel = kfunc_meta->flags & KF_RELEASE;
- kptr_get = kfunc_meta->flags & KF_KPTR_GET;
- trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
- sleepable = kfunc_meta->flags & KF_SLEEPABLE;
- }
-
/* check that BTF function arguments match actual types that the
* verifier sees.
*/
@@ -6365,42 +6686,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno];
- bool obj_ptr = false;
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
if (btf_type_is_scalar(t)) {
- if (is_kfunc && kfunc_meta) {
- bool is_buf_size = false;
-
- /* check for any const scalar parameter of name "rdonly_buf_size"
- * or "rdwr_buf_size"
- */
- if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
- "rdonly_buf_size")) {
- kfunc_meta->r0_rdonly = true;
- is_buf_size = true;
- } else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
- "rdwr_buf_size"))
- is_buf_size = true;
-
- if (is_buf_size) {
- if (kfunc_meta->r0_size) {
- bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
- return -EINVAL;
- }
-
- if (!tnum_is_const(reg->var_off)) {
- bpf_log(log, "R%d is not a const\n", regno);
- return -EINVAL;
- }
-
- kfunc_meta->r0_size = reg->var_off.value;
- ret = mark_chain_precision(env, regno);
- if (ret)
- return ret;
- }
- }
-
if (reg->type == SCALAR_VALUE)
continue;
bpf_log(log, "R%d is not a scalar\n", regno);
@@ -6413,88 +6701,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
- /* These register types have special constraints wrt ref_obj_id
- * and offset checks. The rest of trusted args don't.
- */
- obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
- reg2btf_ids[base_type(reg->type)];
-
- /* Check if argument must be a referenced pointer, args + i has
- * been verified to be a pointer (after skipping modifiers).
- * PTR_TO_CTX is ok without having non-zero ref_obj_id.
- */
- if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
- bpf_log(log, "R%d must be referenced\n", regno);
- return -EINVAL;
- }
-
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
- /* Trusted args have the same offset checks as release arguments */
- if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id))
- arg_type |= OBJ_RELEASE;
ret = check_func_arg_reg_off(env, reg, regno, arg_type);
if (ret < 0)
return ret;
- if (is_kfunc && reg->ref_obj_id) {
- /* Ensure only one argument is referenced PTR_TO_BTF_ID */
- if (ref_obj_id) {
- bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
- regno, reg->ref_obj_id, ref_obj_id);
- return -EFAULT;
- }
- ref_regno = regno;
- ref_obj_id = reg->ref_obj_id;
- }
-
- /* kptr_get is only true for kfunc */
- if (i == 0 && kptr_get) {
- struct btf_field *kptr_field;
-
- if (reg->type != PTR_TO_MAP_VALUE) {
- bpf_log(log, "arg#0 expected pointer to map value\n");
- return -EINVAL;
- }
-
- /* check_func_arg_reg_off allows var_off for
- * PTR_TO_MAP_VALUE, but we need fixed offset to find
- * off_desc.
- */
- if (!tnum_is_const(reg->var_off)) {
- bpf_log(log, "arg#0 must have constant offset\n");
- return -EINVAL;
- }
-
- kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
- if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
- bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n",
- reg->off + reg->var_off.value);
- return -EINVAL;
- }
-
- if (!btf_type_is_ptr(ref_t)) {
- bpf_log(log, "arg#0 BTF type must be a double pointer\n");
- return -EINVAL;
- }
-
- ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id);
- ref_tname = btf_name_by_offset(btf, ref_t->name_off);
-
- if (!btf_type_is_struct(ref_t)) {
- bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
- func_name, i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
- if (!btf_struct_ids_match(log, btf, ref_id, 0, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id, true)) {
- bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n",
- func_name, i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
- /* rest of the arguments can be anything, like normal kfunc */
- } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+ if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
/* If function expects ctx type in BTF check that caller
* is passing PTR_TO_CTX.
*/
@@ -6504,109 +6718,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
- } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
- (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
- const struct btf_type *reg_ref_t;
- const struct btf *reg_btf;
- const char *reg_ref_tname;
- u32 reg_ref_id;
-
- if (!btf_type_is_struct(ref_t)) {
- bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
- func_name, i, btf_type_str(ref_t),
- ref_tname);
- return -EINVAL;
- }
-
- if (reg->type == PTR_TO_BTF_ID) {
- reg_btf = reg->btf;
- reg_ref_id = reg->btf_id;
- } else {
- reg_btf = btf_vmlinux;
- reg_ref_id = *reg2btf_ids[base_type(reg->type)];
- }
-
- reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
- &reg_ref_id);
- reg_ref_tname = btf_name_by_offset(reg_btf,
- reg_ref_t->name_off);
- if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
- reg->off, btf, ref_id,
- trusted_args || (rel && reg->ref_obj_id))) {
- bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
- func_name, i,
- btf_type_str(ref_t), ref_tname,
- regno, btf_type_str(reg_ref_t),
- reg_ref_tname);
- return -EINVAL;
- }
} else if (ptr_to_mem_ok && processing_call) {
const struct btf_type *resolve_ret;
u32 type_size;
- if (is_kfunc) {
- bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
- bool arg_dynptr = btf_type_is_struct(ref_t) &&
- !strcmp(ref_tname,
- stringify_struct(bpf_dynptr_kern));
-
- /* Permit pointer to mem, but only when argument
- * type is pointer to scalar, or struct composed
- * (recursively) of scalars.
- * When arg_mem_size is true, the pointer can be
- * void *.
- * Also permit initialized local dynamic pointers.
- */
- if (!btf_type_is_scalar(ref_t) &&
- !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
- !arg_dynptr &&
- (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
- bpf_log(log,
- "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
- i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
- return -EINVAL;
- }
-
- if (arg_dynptr) {
- if (reg->type != PTR_TO_STACK) {
- bpf_log(log, "arg#%d pointer type %s %s not to stack\n",
- i, btf_type_str(ref_t),
- ref_tname);
- return -EINVAL;
- }
-
- if (!is_dynptr_reg_valid_init(env, reg)) {
- bpf_log(log,
- "arg#%d pointer type %s %s must be valid and initialized\n",
- i, btf_type_str(ref_t),
- ref_tname);
- return -EINVAL;
- }
-
- if (!is_dynptr_type_expected(env, reg,
- ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
- bpf_log(log,
- "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
- i, btf_type_str(ref_t),
- ref_tname);
- return -EINVAL;
- }
-
- continue;
- }
-
- /* Check for mem, len pair */
- if (arg_mem_size) {
- if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
- bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
- i, i + 1);
- return -EINVAL;
- }
- i++;
- continue;
- }
- }
-
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
if (IS_ERR(resolve_ret)) {
bpf_log(log,
@@ -6619,36 +6734,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (check_mem_reg(env, reg, regno, type_size))
return -EINVAL;
} else {
- bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i,
- is_kfunc ? "kernel " : "", func_name, func_id);
+ bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i,
+ func_name, func_id);
return -EINVAL;
}
}
- /* Either both are set, or neither */
- WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
- /* We already made sure ref_obj_id is set only for one argument. We do
- * allow (!rel && ref_obj_id), so that passing such referenced
- * PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when
- * is_kfunc is true.
- */
- if (rel && !ref_obj_id) {
- bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
- func_name);
- return -EINVAL;
- }
-
- if (sleepable && !env->prog->aux->sleepable) {
- bpf_log(log, "kernel function %s is sleepable but the program is not\n",
- func_name);
- return -EINVAL;
- }
-
- if (kfunc_meta && ref_obj_id)
- kfunc_meta->ref_obj_id = ref_obj_id;
-
- /* returns argument register number > 0 in case of reference release kfunc */
- return rel ? ref_regno : 0;
+ return 0;
}
/* Compare BTF of a function declaration with given bpf_reg_state.
@@ -6678,7 +6770,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, false);
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false);
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
@@ -6721,7 +6813,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, true);
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true);
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
@@ -6732,14 +6824,6 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
return err;
}
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
- const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs,
- struct bpf_kfunc_arg_meta *meta)
-{
- return btf_check_func_arg_match(env, btf, func_id, regs, true, meta, true);
-}
-
/* Convert BTF of a function into bpf_reg_state if possible
* Returns:
* EFAULT - there is a verifier bug. Abort verification.
@@ -7122,23 +7206,6 @@ bool btf_is_module(const struct btf *btf)
return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0;
}
-static int btf_id_cmp_func(const void *a, const void *b)
-{
- const int *pa = a, *pb = b;
-
- return *pa - *pb;
-}
-
-bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
-{
- return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
-}
-
-static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
-{
- return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
-}
-
enum {
BTF_MODULE_F_LIVE = (1 << 0),
};
@@ -7499,6 +7566,8 @@ static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
{
switch (prog_type) {
+ case BPF_PROG_TYPE_UNSPEC:
+ return BTF_KFUNC_HOOK_COMMON;
case BPF_PROG_TYPE_XDP:
return BTF_KFUNC_HOOK_XDP;
case BPF_PROG_TYPE_SCHED_CLS:
@@ -7527,6 +7596,11 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
u32 kfunc_btf_id)
{
enum btf_kfunc_hook hook;
+ u32 *kfunc_flags;
+
+ kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
+ if (kfunc_flags)
+ return kfunc_flags;
hook = bpf_prog_type_to_kfunc_hook(prog_type);
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index fbc6167c3599..06989d278846 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -164,16 +164,30 @@ static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv;
struct cgroup *cgrp = aux->cgroup.start;
+ /* bpf_iter_attach_cgroup() has already acquired an extra reference
+ * for the start cgroup, but the reference may be released after
+ * cgroup_iter_seq_init(), so acquire another reference for the
+ * start cgroup.
+ */
p->start_css = &cgrp->self;
+ css_get(p->start_css);
p->terminate = false;
p->visited_all = false;
p->order = aux->cgroup.order;
return 0;
}
+static void cgroup_iter_seq_fini(void *priv)
+{
+ struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv;
+
+ css_put(p->start_css);
+}
+
static const struct bpf_iter_seq_info cgroup_iter_seq_info = {
.seq_ops = &cgroup_iter_seq_ops,
.init_seq_private = cgroup_iter_seq_init,
+ .fini_seq_private = cgroup_iter_seq_fini,
.seq_priv_size = sizeof(struct cgroup_iter_priv),
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9c16338bcbe8..2e57fc839a5c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -34,6 +34,7 @@
#include <linux/log2.h>
#include <linux/bpf_verifier.h>
#include <linux/nodemask.h>
+#include <linux/bpf_mem_alloc.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -60,6 +61,9 @@
#define CTX regs[BPF_REG_CTX]
#define IMM insn->imm
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
+
/* No hurry in this branch
*
* Exported for the bpf jit load helper.
@@ -2746,6 +2750,18 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len)
return -ENOTSUPP;
}
+#ifdef CONFIG_BPF_SYSCALL
+static int __init bpf_global_ma_init(void)
+{
+ int ret;
+
+ ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
+ bpf_global_ma_set = !ret;
+ return ret;
+}
+late_initcall(bpf_global_ma_init);
+#endif
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 6b6a78c04b90..e0b2d016f0bf 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -667,9 +667,9 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags)
{
- return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+ return __bpf_xdp_redirect_map(map, index, flags, 0,
__cpu_map_lookup_elem);
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f9a87dcc5535..d01e4c55b376 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -992,14 +992,14 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
map, key, value, map_flags);
}
-static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
__dev_map_lookup_elem);
}
-static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 50d254cd0709..5aa2b5525f79 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1511,7 +1511,6 @@ static void htab_map_free(struct bpf_map *map)
prealloc_destroy(htab);
}
- bpf_map_free_record(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 283f55bbeb70..a5a511430f2a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/bpf-cgroup.h>
+#include <linux/cgroup.h>
#include <linux/rcupdate.h>
#include <linux/random.h>
#include <linux/smp.h>
@@ -19,6 +20,7 @@
#include <linux/proc_ns.h>
#include <linux/security.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_mem_alloc.h>
#include "../../lib/kstrtox.h"
@@ -336,6 +338,7 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
.gpl_only = false,
.ret_type = RET_VOID,
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
+ .arg1_btf_id = BPF_PTR_POISON,
};
static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
@@ -358,6 +361,7 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
.gpl_only = false,
.ret_type = RET_VOID,
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
+ .arg1_btf_id = BPF_PTR_POISON,
};
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
@@ -657,6 +661,7 @@ BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_proto = {
.func = bpf_copy_from_user,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
@@ -687,6 +692,7 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_task_proto = {
.func = bpf_copy_from_user_task,
.gpl_only = true,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
@@ -1706,20 +1712,367 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
}
-BTF_SET8_START(tracing_btf_ids)
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+ struct bpf_spin_lock *spin_lock)
+{
+ struct list_head *head = list_head, *orig_head = list_head;
+
+ BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
+ BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
+
+ /* Do the actual list draining outside the lock to not hold the lock for
+ * too long, and also prevent deadlocks if tracing programs end up
+ * executing on entry/exit of functions called inside the critical
+ * section, and end up doing map ops that call bpf_list_head_free for
+ * the same map value again.
+ */
+ __bpf_spin_lock_irqsave(spin_lock);
+ if (!head->next || list_empty(head))
+ goto unlock;
+ head = head->next;
+unlock:
+ INIT_LIST_HEAD(orig_head);
+ __bpf_spin_unlock_irqrestore(spin_lock);
+
+ while (head != orig_head) {
+ void *obj = head;
+
+ obj -= field->list_head.node_offset;
+ head = head->next;
+ /* The contained type can also have resources, including a
+ * bpf_list_head which needs to be freed.
+ */
+ bpf_obj_free_fields(field->list_head.value_rec, obj);
+ /* bpf_mem_free requires migrate_disable(), since we can be
+ * called from map free path as well apart from BPF program (as
+ * part of map ops doing bpf_obj_free_fields).
+ */
+ migrate_disable();
+ bpf_mem_free(&bpf_global_ma, obj);
+ migrate_enable();
+ }
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+ struct btf_struct_meta *meta = meta__ign;
+ u64 size = local_type_id__k;
+ void *p;
+
+ p = bpf_mem_alloc(&bpf_global_ma, size);
+ if (!p)
+ return NULL;
+ if (meta)
+ bpf_obj_init(meta->field_offs, p);
+ return p;
+}
+
+void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+ struct btf_struct_meta *meta = meta__ign;
+ void *p = p__alloc;
+
+ if (meta)
+ bpf_obj_free_fields(meta->record, p);
+ bpf_mem_free(&bpf_global_ma, p);
+}
+
+static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
+{
+ struct list_head *n = (void *)node, *h = (void *)head;
+
+ if (unlikely(!h->next))
+ INIT_LIST_HEAD(h);
+ if (unlikely(!n->next))
+ INIT_LIST_HEAD(n);
+ tail ? list_add_tail(n, h) : list_add(n, h);
+}
+
+void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
+{
+ return __bpf_list_add(node, head, false);
+}
+
+void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
+{
+ return __bpf_list_add(node, head, true);
+}
+
+static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
+{
+ struct list_head *n, *h = (void *)head;
+
+ if (unlikely(!h->next))
+ INIT_LIST_HEAD(h);
+ if (list_empty(h))
+ return NULL;
+ n = tail ? h->prev : h->next;
+ list_del_init(n);
+ return (struct bpf_list_node *)n;
+}
+
+struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
+{
+ return __bpf_list_del(head, false);
+}
+
+struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
+{
+ return __bpf_list_del(head, true);
+}
+
+/**
+ * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
+ * kfunc which is not stored in a map as a kptr, must be released by calling
+ * bpf_task_release().
+ * @p: The task on which a reference is being acquired.
+ */
+struct task_struct *bpf_task_acquire(struct task_struct *p)
+{
+ refcount_inc(&p->rcu_users);
+ return p;
+}
+
+/**
+ * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
+ * kptr acquired by this kfunc which is not subsequently stored in a map, must
+ * be released by calling bpf_task_release().
+ * @pp: A pointer to a task kptr on which a reference is being acquired.
+ */
+struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = READ_ONCE(*pp);
+
+ /* Another context could remove the task from the map and release it at
+ * any time, including after we've done the lookup above. This is safe
+ * because we're in an RCU read region, so the task is guaranteed to
+ * remain valid until at least the rcu_read_unlock() below.
+ */
+ if (p && !refcount_inc_not_zero(&p->rcu_users))
+ /* If the task had been removed from the map and freed as
+ * described above, refcount_inc_not_zero() will return false.
+ * The task will be freed at some point after the current RCU
+ * gp has ended, so just return NULL to the user.
+ */
+ p = NULL;
+ rcu_read_unlock();
+
+ return p;
+}
+
+/**
+ * bpf_task_release - Release the reference acquired on a struct task_struct *.
+ * If this kfunc is invoked in an RCU read region, the task_struct is
+ * guaranteed to not be freed until the current grace period has ended, even if
+ * its refcount drops to 0.
+ * @p: The task on which a reference is being released.
+ */
+void bpf_task_release(struct task_struct *p)
+{
+ if (!p)
+ return;
+
+ put_task_struct_rcu_user(p);
+}
+
+#ifdef CONFIG_CGROUPS
+/**
+ * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
+ * this kfunc which is not stored in a map as a kptr, must be released by
+ * calling bpf_cgroup_release().
+ * @cgrp: The cgroup on which a reference is being acquired.
+ */
+struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
+{
+ cgroup_get(cgrp);
+ return cgrp;
+}
+
+/**
+ * bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup
+ * kptr acquired by this kfunc which is not subsequently stored in a map, must
+ * be released by calling bpf_cgroup_release().
+ * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired.
+ */
+struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
+{
+ struct cgroup *cgrp;
+
+ rcu_read_lock();
+ /* Another context could remove the cgroup from the map and release it
+ * at any time, including after we've done the lookup above. This is
+ * safe because we're in an RCU read region, so the cgroup is
+ * guaranteed to remain valid until at least the rcu_read_unlock()
+ * below.
+ */
+ cgrp = READ_ONCE(*cgrpp);
+
+ if (cgrp && !cgroup_tryget(cgrp))
+ /* If the cgroup had been removed from the map and freed as
+ * described above, cgroup_tryget() will return false. The
+ * cgroup will be freed at some point after the current RCU gp
+ * has ended, so just return NULL to the user.
+ */
+ cgrp = NULL;
+ rcu_read_unlock();
+
+ return cgrp;
+}
+
+/**
+ * bpf_cgroup_release - Release the reference acquired on a struct cgroup *.
+ * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
+ * not be freed until the current grace period has ended, even if its refcount
+ * drops to 0.
+ * @cgrp: The cgroup on which a reference is being released.
+ */
+void bpf_cgroup_release(struct cgroup *cgrp)
+{
+ if (!cgrp)
+ return;
+
+ cgroup_put(cgrp);
+}
+
+/**
+ * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
+ * array. A cgroup returned by this kfunc which is not subsequently stored in a
+ * map, must be released by calling bpf_cgroup_release().
+ * @cgrp: The cgroup for which we're performing a lookup.
+ * @level: The level of ancestor to look up.
+ */
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
+{
+ struct cgroup *ancestor;
+
+ if (level > cgrp->level || level < 0)
+ return NULL;
+
+ ancestor = cgrp->ancestors[level];
+ cgroup_get(ancestor);
+ return ancestor;
+}
+#endif /* CONFIG_CGROUPS */
+
+/**
+ * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
+ * in the root pid namespace idr. If a task is returned, it must either be
+ * stored in a map, or released with bpf_task_release().
+ * @pid: The pid of the task being looked up.
+ */
+struct task_struct *bpf_task_from_pid(s32 pid)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (p)
+ bpf_task_acquire(p);
+ rcu_read_unlock();
+
+ return p;
+}
+
+void *bpf_cast_to_kern_ctx(void *obj)
+{
+ return obj;
+}
+
+void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
+{
+ return obj__ign;
+}
+
+void bpf_rcu_read_lock(void)
+{
+ rcu_read_lock();
+}
+
+void bpf_rcu_read_unlock(void)
+{
+ rcu_read_unlock();
+}
+
+__diag_pop();
+
+BTF_SET8_START(generic_btf_ids)
#ifdef CONFIG_KEXEC_CORE
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
-BTF_SET8_END(tracing_btf_ids)
+BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_list_push_front)
+BTF_ID_FLAGS(func, bpf_list_push_back)
+BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
+#ifdef CONFIG_CGROUPS
+BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+#endif
+BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
+BTF_SET8_END(generic_btf_ids)
+
+static const struct btf_kfunc_id_set generic_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &generic_btf_ids,
+};
+
+
+BTF_ID_LIST(generic_dtor_ids)
+BTF_ID(struct, task_struct)
+BTF_ID(func, bpf_task_release)
+#ifdef CONFIG_CGROUPS
+BTF_ID(struct, cgroup)
+BTF_ID(func, bpf_cgroup_release)
+#endif
-static const struct btf_kfunc_id_set tracing_kfunc_set = {
+BTF_SET8_START(common_btf_ids)
+BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
+BTF_ID_FLAGS(func, bpf_rdonly_cast)
+BTF_ID_FLAGS(func, bpf_rcu_read_lock)
+BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
+BTF_SET8_END(common_btf_ids)
+
+static const struct btf_kfunc_id_set common_kfunc_set = {
.owner = THIS_MODULE,
- .set = &tracing_btf_ids,
+ .set = &common_btf_ids,
};
static int __init kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set);
+ int ret;
+ const struct btf_id_dtor_kfunc generic_dtors[] = {
+ {
+ .btf_id = generic_dtor_ids[0],
+ .kfunc_btf_id = generic_dtor_ids[1]
+ },
+#ifdef CONFIG_CGROUPS
+ {
+ .btf_id = generic_dtor_ids[2],
+ .kfunc_btf_id = generic_dtor_ids[3]
+ },
+#endif
+ };
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
+ ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
+ ARRAY_SIZE(generic_dtors),
+ THIS_MODULE);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 8ca0cca39d49..38136ec4e095 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
struct bpf_map *inner_map, *inner_map_meta;
u32 inner_map_meta_size;
struct fd f;
+ int ret;
f = fdget(inner_map_ufd);
inner_map = __bpf_map_get(f);
@@ -20,18 +21,13 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
/* Does not support >1 level map-in-map */
if (inner_map->inner_map_meta) {
- fdput(f);
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto put;
}
if (!inner_map->ops->map_meta_equal) {
- fdput(f);
- return ERR_PTR(-ENOTSUPP);
- }
-
- if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) {
- fdput(f);
- return ERR_PTR(-ENOTSUPP);
+ ret = -ENOTSUPP;
+ goto put;
}
inner_map_meta_size = sizeof(*inner_map_meta);
@@ -41,8 +37,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
if (!inner_map_meta) {
- fdput(f);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto put;
}
inner_map_meta->map_type = inner_map->map_type;
@@ -50,15 +46,33 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->value_size = inner_map->value_size;
inner_map_meta->map_flags = inner_map->map_flags;
inner_map_meta->max_entries = inner_map->max_entries;
+
inner_map_meta->record = btf_record_dup(inner_map->record);
if (IS_ERR(inner_map_meta->record)) {
/* btf_record_dup returns NULL or valid pointer in case of
* invalid/empty/valid, but ERR_PTR in case of errors. During
* equality NULL or IS_ERR is equivalent.
*/
- fdput(f);
- return ERR_CAST(inner_map_meta->record);
+ ret = PTR_ERR(inner_map_meta->record);
+ goto free;
}
+ if (inner_map_meta->record) {
+ struct btf_field_offs *field_offs;
+ /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always
+ * valid.
+ */
+ field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN);
+ if (!field_offs) {
+ ret = -ENOMEM;
+ goto free_rec;
+ }
+ inner_map_meta->field_offs = field_offs;
+ }
+ /* Note: We must use the same BTF, as we also used btf_record_dup above
+ * which relies on BTF being same for both maps, as some members like
+ * record->fields.list_head have pointers like value_rec pointing into
+ * inner_map->btf.
+ */
if (inner_map->btf) {
btf_get(inner_map->btf);
inner_map_meta->btf = inner_map->btf;
@@ -74,10 +88,18 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
fdput(f);
return inner_map_meta;
+free_rec:
+ btf_record_free(inner_map_meta->record);
+free:
+ kfree(inner_map_meta);
+put:
+ fdput(f);
+ return ERR_PTR(ret);
}
void bpf_map_meta_free(struct bpf_map *map_meta)
{
+ kfree(map_meta->field_offs);
bpf_map_free_record(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 9e832acf4692..80f4b4d88aaf 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -447,7 +447,7 @@ BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
const struct bpf_func_proto bpf_ringbuf_reserve_proto = {
.func = bpf_ringbuf_reserve,
- .ret_type = RET_PTR_TO_ALLOC_MEM_OR_NULL,
+ .ret_type = RET_PTR_TO_RINGBUF_MEM_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
@@ -490,7 +490,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
const struct bpf_func_proto bpf_ringbuf_submit_proto = {
.func = bpf_ringbuf_submit,
.ret_type = RET_VOID,
- .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
+ .arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
.arg2_type = ARG_ANYTHING,
};
@@ -503,7 +503,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
const struct bpf_func_proto bpf_ringbuf_discard_proto = {
.func = bpf_ringbuf_discard,
.ret_type = RET_VOID,
- .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
+ .arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
.arg2_type = ARG_ANYTHING,
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 85532d301124..35972afb6850 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -175,8 +175,8 @@ static void maybe_wait_bpf_programs(struct bpf_map *map)
synchronize_rcu();
}
-static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
- void *value, __u64 flags)
+static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
+ void *key, void *value, __u64 flags)
{
int err;
@@ -190,7 +190,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
return sock_map_update_elem_sys(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
- return bpf_fd_array_map_update_elem(map, f.file, key, value,
+ return bpf_fd_array_map_update_elem(map, map_file, key, value,
flags);
}
@@ -205,12 +205,12 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
flags);
} else if (IS_FD_ARRAY(map)) {
rcu_read_lock();
- err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+ err = bpf_fd_array_map_update_elem(map, map_file, key, value,
flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
rcu_read_lock();
- err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+ err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
@@ -536,6 +536,10 @@ void btf_record_free(struct btf_record *rec)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
break;
+ case BPF_LIST_HEAD:
+ case BPF_LIST_NODE:
+ /* Nothing to release for bpf_list_head */
+ break;
default:
WARN_ON_ONCE(1);
continue;
@@ -578,6 +582,10 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
goto free;
}
break;
+ case BPF_LIST_HEAD:
+ case BPF_LIST_NODE:
+ /* Nothing to acquire for bpf_list_head */
+ break;
default:
ret = -EFAULT;
WARN_ON_ONCE(1);
@@ -603,6 +611,20 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r
if (rec_a->cnt != rec_b->cnt)
return false;
size = offsetof(struct btf_record, fields[rec_a->cnt]);
+ /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
+ * members are zeroed out. So memcmp is safe to do without worrying
+ * about padding/unused fields.
+ *
+ * While spin_lock, timer, and kptr have no relation to map BTF,
+ * list_head metadata is specific to map BTF, the btf and value_rec
+ * members in particular. btf is the map BTF, while value_rec points to
+ * btf_record in that map BTF.
+ *
+ * So while by default, we don't rely on the map BTF (which the records
+ * were parsed from) matching for both records, which is not backwards
+ * compatible, in case list_head is part of it, we implicitly rely on
+ * that by way of depending on memcmp succeeding for it.
+ */
return !memcmp(rec_a, rec_b, size);
}
@@ -637,6 +659,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
case BPF_KPTR_REF:
field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
break;
+ case BPF_LIST_HEAD:
+ if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+ continue;
+ bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
+ break;
+ case BPF_LIST_NODE:
+ break;
default:
WARN_ON_ONCE(1);
continue;
@@ -648,14 +677,24 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
+ struct btf_field_offs *foffs = map->field_offs;
+ struct btf_record *rec = map->record;
security_bpf_map_free(map);
- kfree(map->field_offs);
bpf_map_release_memcg(map);
- /* implementation dependent freeing, map_free callback also does
- * bpf_map_free_record, if needed.
- */
+ /* implementation dependent freeing */
map->ops->map_free(map);
+ /* Delay freeing of field_offs and btf_record for maps, as map_free
+ * callback usually needs access to them. It is better to do it here
+ * than require each callback to do the free itself manually.
+ *
+ * Note that the btf_record stashed in map->inner_map_meta->record was
+ * already freed using the map_free callback for map in map case which
+ * eventually calls bpf_map_free_meta, since inner_map_meta is only a
+ * template bpf_map struct used during verification.
+ */
+ kfree(foffs);
+ btf_record_free(rec);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -965,7 +1004,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (!value_type || value_size != map->value_size)
return -EINVAL;
- map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR,
+ map->record = btf_parse_fields(btf, value_type,
+ BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -998,7 +1038,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
goto free_map_tab;
}
break;
@@ -1012,6 +1052,14 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
goto free_map_tab;
}
break;
+ case BPF_LIST_HEAD:
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ break;
default:
/* Fail if map_type checks are missing for a field type */
ret = -EOPNOTSUPP;
@@ -1020,6 +1068,10 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
}
+ ret = btf_check_and_fixup_fields(btf, map->record);
+ if (ret < 0)
+ goto free_map_tab;
+
if (map->ops->map_check_btf) {
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
if (ret < 0)
@@ -1390,7 +1442,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto free_key;
}
- err = bpf_map_update_value(map, f, key, value, attr->flags);
+ err = bpf_map_update_value(map, f.file, key, value, attr->flags);
kvfree(value);
free_key:
@@ -1576,16 +1628,14 @@ int generic_map_delete_batch(struct bpf_map *map,
return err;
}
-int generic_map_update_batch(struct bpf_map *map,
+int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
u32 value_size, cp, max_count;
- int ufd = attr->batch.map_fd;
void *key, *value;
- struct fd f;
int err = 0;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
@@ -1612,7 +1662,6 @@ int generic_map_update_batch(struct bpf_map *map,
return -ENOMEM;
}
- f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
for (cp = 0; cp < max_count; cp++) {
err = -EFAULT;
if (copy_from_user(key, keys + cp * map->key_size,
@@ -1620,7 +1669,7 @@ int generic_map_update_batch(struct bpf_map *map,
copy_from_user(value, values + cp * value_size, value_size))
break;
- err = bpf_map_update_value(map, f, key, value,
+ err = bpf_map_update_value(map, map_file, key, value,
attr->batch.elem_flags);
if (err)
@@ -1633,7 +1682,6 @@ int generic_map_update_batch(struct bpf_map *map,
kvfree(value);
kvfree(key);
- fdput(f);
return err;
}
@@ -4426,13 +4474,13 @@ put_file:
#define BPF_MAP_BATCH_LAST_FIELD batch.flags
-#define BPF_DO_BATCH(fn) \
+#define BPF_DO_BATCH(fn, ...) \
do { \
if (!fn) { \
err = -ENOTSUPP; \
goto err_put; \
} \
- err = fn(map, attr, uattr); \
+ err = fn(__VA_ARGS__); \
} while (0)
static int bpf_map_do_batch(const union bpf_attr *attr,
@@ -4466,13 +4514,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
}
if (cmd == BPF_MAP_LOOKUP_BATCH)
- BPF_DO_BATCH(map->ops->map_lookup_batch);
+ BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr);
else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
- BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
+ BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr);
else if (cmd == BPF_MAP_UPDATE_BATCH)
- BPF_DO_BATCH(map->ops->map_update_batch);
+ BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr);
else
- BPF_DO_BATCH(map->ops->map_delete_batch);
+ BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
err_put:
if (has_write)
bpf_map_write_active_dec(map);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 648a549c41ae..4e7f1d085e53 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -451,10 +451,24 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON;
}
+static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
+{
+ struct btf_record *rec = NULL;
+ struct btf_struct_meta *meta;
+
+ if (reg->type == PTR_TO_MAP_VALUE) {
+ rec = reg->map_ptr->record;
+ } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ meta = btf_find_struct_meta(reg->btf, reg->btf_id);
+ if (meta)
+ rec = meta->record;
+ }
+ return rec;
+}
+
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
- return reg->type == PTR_TO_MAP_VALUE &&
- btf_record_has_field(reg->map_ptr->record, BPF_SPIN_LOCK);
+ return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
}
static bool type_is_rdonly_mem(u32 type)
@@ -513,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_user_ringbuf_drain;
}
+static bool is_storage_get_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_sk_storage_get ||
+ func_id == BPF_FUNC_inode_storage_get ||
+ func_id == BPF_FUNC_task_storage_get ||
+ func_id == BPF_FUNC_cgrp_storage_get;
+}
+
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -543,7 +565,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
static const char *reg_type_str(struct bpf_verifier_env *env,
enum bpf_reg_type type)
{
- char postfix[16] = {0}, prefix[32] = {0};
+ char postfix[16] = {0}, prefix[64] = {0};
static const char * const str[] = {
[NOT_INIT] = "?",
[SCALAR_VALUE] = "scalar",
@@ -575,16 +597,15 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
strncpy(postfix, "_or_null", 16);
}
- if (type & MEM_RDONLY)
- strncpy(prefix, "rdonly_", 32);
- if (type & MEM_ALLOC)
- strncpy(prefix, "alloc_", 32);
- if (type & MEM_USER)
- strncpy(prefix, "user_", 32);
- if (type & MEM_PERCPU)
- strncpy(prefix, "percpu_", 32);
- if (type & PTR_UNTRUSTED)
- strncpy(prefix, "untrusted_", 32);
+ snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
+ type & MEM_RDONLY ? "rdonly_" : "",
+ type & MEM_RINGBUF ? "ringbuf_" : "",
+ type & MEM_USER ? "user_" : "",
+ type & MEM_PERCPU ? "percpu_" : "",
+ type & MEM_RCU ? "rcu_" : "",
+ type & PTR_UNTRUSTED ? "untrusted_" : "",
+ type & PTR_TRUSTED ? "trusted_" : ""
+ );
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -1010,9 +1031,9 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- if (ksize(dst) < bytes) {
+ if (ksize(dst) < ksize(src)) {
kfree(dst);
- dst = kmalloc_track_caller(bytes, flags);
+ dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags);
if (!dst)
return NULL;
}
@@ -1029,12 +1050,14 @@ out:
*/
static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
{
+ size_t alloc_size;
void *new_arr;
if (!new_n || old_n == new_n)
goto out;
- new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
+ alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
+ new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
if (!new_arr) {
kfree(arr);
return NULL;
@@ -1206,8 +1229,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->frame[i] = NULL;
}
dst_state->speculative = src->speculative;
+ dst_state->active_rcu_lock = src->active_rcu_lock;
dst_state->curframe = src->curframe;
- dst_state->active_spin_lock = src->active_spin_lock;
+ dst_state->active_lock.ptr = src->active_lock.ptr;
+ dst_state->active_lock.id = src->active_lock.id;
dst_state->branches = src->branches;
dst_state->parent = src->parent;
dst_state->first_insn_idx = src->first_insn_idx;
@@ -2506,9 +2531,11 @@ static int push_jmp_history(struct bpf_verifier_env *env,
{
u32 cnt = cur->jmp_history_cnt;
struct bpf_idx_pair *p;
+ size_t alloc_size;
cnt++;
- p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
+ alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+ p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
if (!p)
return -ENOMEM;
p[cnt - 1].idx = env->insn_idx;
@@ -3844,7 +3871,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno)
{
const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
- int perm_flags = PTR_MAYBE_NULL;
+ int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
const char *reg_name = "";
/* Only unreferenced case accepts untrusted pointers */
@@ -4241,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
+static bool is_trusted_reg(const struct bpf_reg_state *reg)
+{
+ /* A referenced register is always trusted. */
+ if (reg->ref_obj_id)
+ return true;
+
+ /* If a register is not referenced, it is trusted if it has the
+ * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
+ * other type modifiers may be safe, but we elect to take an opt-in
+ * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
+ * not.
+ *
+ * Eventually, we should make PTR_TRUSTED the single source of truth
+ * for whether a register is trusted.
+ */
+ return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
+ !bpf_type_has_unsafe_modifiers(reg->type);
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -4687,17 +4733,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
- if (env->ops->btf_struct_access) {
- ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
- off, size, atype, &btf_id, &flag);
+ if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
+ if (!btf_is_kernel(reg->btf)) {
+ verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
+ return -EFAULT;
+ }
+ ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
} else {
- if (atype != BPF_READ) {
+ /* Writes are permitted with default btf_struct_access for
+ * program allocated objects (which always have ref_obj_id > 0),
+ * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
+ */
+ if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
verbose(env, "only read is supported\n");
return -EACCES;
}
- ret = btf_struct_access(&env->log, reg->btf, t, off, size,
- atype, &btf_id, &flag);
+ if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
+ verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
+ return -EFAULT;
+ }
+
+ ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
}
if (ret < 0)
@@ -4709,6 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (type_flag(reg->type) & PTR_UNTRUSTED)
flag |= PTR_UNTRUSTED;
+ /* By default any pointer obtained from walking a trusted pointer is
+ * no longer trusted except the rcu case below.
+ */
+ flag &= ~PTR_TRUSTED;
+
+ if (flag & MEM_RCU) {
+ /* Mark value register as MEM_RCU only if it is protected by
+ * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
+ * itself can already indicate trustedness inside the rcu
+ * read lock region. Also mark it as PTR_TRUSTED.
+ */
+ if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
+ flag &= ~MEM_RCU;
+ else
+ flag |= PTR_TRUSTED;
+ } else if (reg->type & MEM_RCU) {
+ /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
+ * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
+ */
+ flag |= PTR_UNTRUSTED;
+ }
+
if (atype == BPF_READ && value_regno >= 0)
mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
@@ -4723,6 +4802,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
{
struct bpf_reg_state *reg = regs + regno;
struct bpf_map *map = reg->map_ptr;
+ struct bpf_reg_state map_reg;
enum bpf_type_flag flag = 0;
const struct btf_type *t;
const char *tname;
@@ -4761,7 +4841,10 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return -EACCES;
}
- ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
+ /* Simulate access to a PTR_TO_BTF_ID */
+ memset(&map_reg, 0, sizeof(map_reg));
+ mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
+ ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
if (ret < 0)
return ret;
@@ -5520,8 +5603,8 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
return err;
}
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- u32 regno)
+static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno)
{
struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
bool may_be_null = type_may_be_null(mem_reg->type);
@@ -5549,23 +5632,26 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state
}
/* Implementation details:
- * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
+ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
+ * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
* Two bpf_map_lookups (even with the same key) will have different reg->id.
- * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
- * value_or_null->value transition, since the verifier only cares about
- * the range of access to valid map value pointer and doesn't care about actual
- * address of the map element.
+ * Two separate bpf_obj_new will also have different reg->id.
+ * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
+ * clears reg->id after value_or_null->value transition, since the verifier only
+ * cares about the range of access to valid map value pointer and doesn't care
+ * about actual address of the map element.
* For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
* reg->id > 0 after value_or_null->value transition. By doing so
* two bpf_map_lookups will be considered two different pointers that
- * point to different bpf_spin_locks.
+ * point to different bpf_spin_locks. Likewise for pointers to allocated objects
+ * returned from bpf_obj_new.
* The verifier allows taking only one bpf_spin_lock at a time to avoid
* dead-locks.
* Since only one bpf_spin_lock is allowed the checks are simpler than
* reg_is_refcounted() logic. The verifier needs to remember only
* one spin_lock instead of array of acquired_refs.
- * cur_state->active_spin_lock remembers which map value element got locked
- * and clears it after bpf_spin_unlock.
+ * cur_state->active_lock remembers which map value element or allocated
+ * object got locked and clears it after bpf_spin_unlock.
*/
static int process_spin_lock(struct bpf_verifier_env *env, int regno,
bool is_lock)
@@ -5573,8 +5659,10 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
struct bpf_verifier_state *cur = env->cur_state;
bool is_const = tnum_is_const(reg->var_off);
- struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
+ struct bpf_map *map = NULL;
+ struct btf *btf = NULL;
+ struct btf_record *rec;
if (!is_const) {
verbose(env,
@@ -5582,38 +5670,78 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
regno);
return -EINVAL;
}
- if (!map->btf) {
- verbose(env,
- "map '%s' has to have BTF in order to use bpf_spin_lock\n",
- map->name);
- return -EINVAL;
+ if (reg->type == PTR_TO_MAP_VALUE) {
+ map = reg->map_ptr;
+ if (!map->btf) {
+ verbose(env,
+ "map '%s' has to have BTF in order to use bpf_spin_lock\n",
+ map->name);
+ return -EINVAL;
+ }
+ } else {
+ btf = reg->btf;
}
- if (!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
- verbose(env, "map '%s' has no valid bpf_spin_lock\n", map->name);
+
+ rec = reg_btf_record(reg);
+ if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
+ verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
+ map ? map->name : "kptr");
return -EINVAL;
}
- if (map->record->spin_lock_off != val + reg->off) {
+ if (rec->spin_lock_off != val + reg->off) {
verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
- val + reg->off, map->record->spin_lock_off);
+ val + reg->off, rec->spin_lock_off);
return -EINVAL;
}
if (is_lock) {
- if (cur->active_spin_lock) {
+ if (cur->active_lock.ptr) {
verbose(env,
"Locking two bpf_spin_locks are not allowed\n");
return -EINVAL;
}
- cur->active_spin_lock = reg->id;
+ if (map)
+ cur->active_lock.ptr = map;
+ else
+ cur->active_lock.ptr = btf;
+ cur->active_lock.id = reg->id;
} else {
- if (!cur->active_spin_lock) {
+ struct bpf_func_state *fstate = cur_func(env);
+ void *ptr;
+ int i;
+
+ if (map)
+ ptr = map;
+ else
+ ptr = btf;
+
+ if (!cur->active_lock.ptr) {
verbose(env, "bpf_spin_unlock without taking a lock\n");
return -EINVAL;
}
- if (cur->active_spin_lock != reg->id) {
+ if (cur->active_lock.ptr != ptr ||
+ cur->active_lock.id != reg->id) {
verbose(env, "bpf_spin_unlock of different lock\n");
return -EINVAL;
}
- cur->active_spin_lock = 0;
+ cur->active_lock.ptr = NULL;
+ cur->active_lock.id = 0;
+
+ for (i = 0; i < fstate->acquired_refs; i++) {
+ int err;
+
+ /* Complain on error because this reference state cannot
+ * be freed before this point, as bpf_spin_lock critical
+ * section does not allow functions that release the
+ * allocated object immediately.
+ */
+ if (!fstate->refs[i].release_on_unlock)
+ continue;
+ err = release_reference(env, fstate->refs[i].id);
+ if (err) {
+ verbose(env, "failed to release release_on_unlock reference");
+ return err;
+ }
+ }
}
return 0;
}
@@ -5772,6 +5900,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = {
PTR_TO_TCP_SOCK,
PTR_TO_XDP_SOCK,
PTR_TO_BTF_ID,
+ PTR_TO_BTF_ID | PTR_TRUSTED,
},
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
};
@@ -5785,7 +5914,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
- PTR_TO_MEM | MEM_ALLOC,
+ PTR_TO_MEM | MEM_RINGBUF,
PTR_TO_BUF,
},
};
@@ -5800,14 +5929,31 @@ static const struct bpf_reg_types int_ptr_types = {
},
};
+static const struct bpf_reg_types spin_lock_types = {
+ .types = {
+ PTR_TO_MAP_VALUE,
+ PTR_TO_BTF_ID | MEM_ALLOC,
+ }
+};
+
static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
+static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
-static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
-static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
-static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
+static const struct bpf_reg_types btf_ptr_types = {
+ .types = {
+ PTR_TO_BTF_ID,
+ PTR_TO_BTF_ID | PTR_TRUSTED,
+ PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
+ },
+};
+static const struct bpf_reg_types percpu_btf_ptr_types = {
+ .types = {
+ PTR_TO_BTF_ID | MEM_PERCPU,
+ PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
+ }
+};
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5836,7 +5982,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
- [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+ [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
@@ -5895,7 +6041,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
found:
- if (reg->type == PTR_TO_BTF_ID) {
+ if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
/* For bpf_sk_release, it needs to match against first member
* 'struct sock_common', hence make an exception for it. This
* allows bpf_sk_release to work for multiple socket types.
@@ -5931,6 +6077,11 @@ found:
return -EACCES;
}
}
+ } else if (type_is_alloc(reg->type)) {
+ if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
+ verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
+ return -EFAULT;
+ }
}
return 0;
@@ -5957,20 +6108,24 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
case PTR_TO_MEM | MEM_RDONLY:
- case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_MEM | MEM_RINGBUF:
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case SCALAR_VALUE:
/* Some of the argument types nevertheless require a
* zero register offset.
*/
- if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
+ if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM)
return 0;
break;
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
* fixed offset.
*/
case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | MEM_ALLOC:
+ case PTR_TO_BTF_ID | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* it's fixed offset must be 0. In the other cases, fixed offset
* can be non-zero.
@@ -6046,7 +6201,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
goto skip_type_check;
/* arg_btf_id and arg_size are in a union. */
- if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
+ if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
+ base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
arg_btf_id = fn->arg_btf_id[arg];
err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
@@ -6664,9 +6820,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
- if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
- return false;
-
+ if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
+ return !!fn->arg_btf_id[i];
+ if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
+ return fn->arg_btf_id[i] == BPF_PTR_POISON;
if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
/* arg_btf_id and arg_size are in a union. */
(base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
@@ -7413,6 +7570,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
+ if (!env->prog->aux->sleepable && fn->might_sleep) {
+ verbose(env, "helper call might sleep in a non-sleepable prog\n");
+ return -EINVAL;
+ }
+
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(fn->func);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
@@ -7431,6 +7593,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
+ if (env->cur_state->active_rcu_lock) {
+ if (fn->might_sleep) {
+ verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
+ func_id_name(func_id), func_id);
+ return -EINVAL;
+ }
+
+ if (env->prog->aux->sleepable && is_storage_get_function(func_id))
+ env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
+ }
+
meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
@@ -7634,7 +7807,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
break;
- case RET_PTR_TO_ALLOC_MEM:
+ case RET_PTR_TO_MEM:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].mem_size = meta.mem_size;
@@ -7797,19 +7970,921 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
+struct bpf_kfunc_call_arg_meta {
+ /* In parameters */
+ struct btf *btf;
+ u32 func_id;
+ u32 kfunc_flags;
+ const struct btf_type *func_proto;
+ const char *func_name;
+ /* Out parameters */
+ u32 ref_obj_id;
+ u8 release_regno;
+ bool r0_rdonly;
+ u32 ret_btf_id;
+ u64 r0_size;
+ struct {
+ u64 value;
+ bool found;
+ } arg_constant;
+ struct {
+ struct btf *btf;
+ u32 btf_id;
+ } arg_obj_drop;
+ struct {
+ struct btf_field *field;
+ } arg_list_head;
+};
+
+static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_ACQUIRE;
+}
+
+static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_RET_NULL;
+}
+
+static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_RELEASE;
+}
+
+static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_TRUSTED_ARGS;
+}
+
+static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_SLEEPABLE;
+}
+
+static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_DESTRUCTIVE;
+}
+
+static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
+{
+ return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
+}
+
+static bool __kfunc_param_match_suffix(const struct btf *btf,
+ const struct btf_param *arg,
+ const char *suffix)
+{
+ int suffix_len = strlen(suffix), len;
+ const char *param_name;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len < suffix_len)
+ return false;
+ param_name += len - suffix_len;
+ return !strncmp(param_name, suffix, suffix_len);
+}
+
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ return __kfunc_param_match_suffix(btf, arg, "__sz");
+}
+
+static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__k");
+}
+
+static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__ign");
+}
+
+static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__alloc");
+}
+
+static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
+ const struct btf_param *arg,
+ const char *name)
+{
+ int len, target_len = strlen(name);
+ const char *param_name;
+
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len != target_len)
+ return false;
+ if (strcmp(param_name, name))
+ return false;
+
+ return true;
+}
+
+enum {
+ KF_ARG_DYNPTR_ID,
+ KF_ARG_LIST_HEAD_ID,
+ KF_ARG_LIST_NODE_ID,
+};
+
+BTF_ID_LIST(kf_arg_btf_ids)
+BTF_ID(struct, bpf_dynptr_kern)
+BTF_ID(struct, bpf_list_head)
+BTF_ID(struct, bpf_list_node)
+
+static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
+ const struct btf_param *arg, int type)
+{
+ const struct btf_type *t;
+ u32 res_id;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!t)
+ return false;
+ if (!btf_type_is_ptr(t))
+ return false;
+ t = btf_type_skip_modifiers(btf, t->type, &res_id);
+ if (!t)
+ return false;
+ return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
+}
+
+static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
+}
+
+static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
+}
+
+static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
+}
+
+/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
+static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
+ const struct btf *btf,
+ const struct btf_type *t, int rec)
+{
+ const struct btf_type *member_type;
+ const struct btf_member *member;
+ u32 i;
+
+ if (!btf_type_is_struct(t))
+ return false;
+
+ for_each_member(i, t, member) {
+ const struct btf_array *array;
+
+ member_type = btf_type_skip_modifiers(btf, member->type, NULL);
+ if (btf_type_is_struct(member_type)) {
+ if (rec >= 3) {
+ verbose(env, "max struct nesting depth exceeded\n");
+ return false;
+ }
+ if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
+ return false;
+ continue;
+ }
+ if (btf_type_is_array(member_type)) {
+ array = btf_array(member_type);
+ if (!array->nelems)
+ return false;
+ member_type = btf_type_skip_modifiers(btf, array->type, NULL);
+ if (!btf_type_is_scalar(member_type))
+ return false;
+ continue;
+ }
+ if (!btf_type_is_scalar(member_type))
+ return false;
+ }
+ return true;
+}
+
+
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+ [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+ [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+};
+
+enum kfunc_ptr_arg_type {
+ KF_ARG_PTR_TO_CTX,
+ KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
+ KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */
+ KF_ARG_PTR_TO_DYNPTR,
+ KF_ARG_PTR_TO_LIST_HEAD,
+ KF_ARG_PTR_TO_LIST_NODE,
+ KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
+ KF_ARG_PTR_TO_MEM,
+ KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
+};
+
+enum special_kfunc_type {
+ KF_bpf_obj_new_impl,
+ KF_bpf_obj_drop_impl,
+ KF_bpf_list_push_front,
+ KF_bpf_list_push_back,
+ KF_bpf_list_pop_front,
+ KF_bpf_list_pop_back,
+ KF_bpf_cast_to_kern_ctx,
+ KF_bpf_rdonly_cast,
+ KF_bpf_rcu_read_lock,
+ KF_bpf_rcu_read_unlock,
+};
+
+BTF_SET_START(special_kfunc_set)
+BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_list_push_front)
+BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_list_pop_front)
+BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_cast_to_kern_ctx)
+BTF_ID(func, bpf_rdonly_cast)
+BTF_SET_END(special_kfunc_set)
+
+BTF_ID_LIST(special_kfunc_list)
+BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_list_push_front)
+BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_list_pop_front)
+BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_cast_to_kern_ctx)
+BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rcu_read_lock)
+BTF_ID(func, bpf_rcu_read_unlock)
+
+static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
+}
+
+static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
+}
+
+static enum kfunc_ptr_arg_type
+get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
+ struct bpf_kfunc_call_arg_meta *meta,
+ const struct btf_type *t, const struct btf_type *ref_t,
+ const char *ref_tname, const struct btf_param *args,
+ int argno, int nargs)
+{
+ u32 regno = argno + 1;
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = &regs[regno];
+ bool arg_mem_size = false;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
+ return KF_ARG_PTR_TO_CTX;
+
+ /* In this function, we verify the kfunc's BTF as per the argument type,
+ * leaving the rest of the verification with respect to the register
+ * type to our caller. When a set of conditions hold in the BTF type of
+ * arguments, we resolve it to a known kfunc_ptr_arg_type.
+ */
+ if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
+ return KF_ARG_PTR_TO_CTX;
+
+ if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_ALLOC_BTF_ID;
+
+ if (is_kfunc_arg_kptr_get(meta, argno)) {
+ if (!btf_type_is_ptr(ref_t)) {
+ verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
+ return -EINVAL;
+ }
+ ref_t = btf_type_by_id(meta->btf, ref_t->type);
+ ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
+ if (!btf_type_is_struct(ref_t)) {
+ verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
+ meta->func_name, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ return KF_ARG_PTR_TO_KPTR;
+ }
+
+ if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_DYNPTR;
+
+ if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_LIST_HEAD;
+
+ if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_LIST_NODE;
+
+ if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
+ if (!btf_type_is_struct(ref_t)) {
+ verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
+ meta->func_name, argno, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ return KF_ARG_PTR_TO_BTF_ID;
+ }
+
+ if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
+ arg_mem_size = true;
+
+ /* This is the catch all argument type of register types supported by
+ * check_helper_mem_access. However, we only allow when argument type is
+ * pointer to scalar, or struct composed (recursively) of scalars. When
+ * arg_mem_size is true, the pointer can be void *.
+ */
+ if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
+ (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
+ verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+ argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
+ return -EINVAL;
+ }
+ return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
+}
+
+static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ const struct btf_type *ref_t,
+ const char *ref_tname, u32 ref_id,
+ struct bpf_kfunc_call_arg_meta *meta,
+ int argno)
+{
+ const struct btf_type *reg_ref_t;
+ bool strict_type_match = false;
+ const struct btf *reg_btf;
+ const char *reg_ref_tname;
+ u32 reg_ref_id;
+
+ if (base_type(reg->type) == PTR_TO_BTF_ID) {
+ reg_btf = reg->btf;
+ reg_ref_id = reg->btf_id;
+ } else {
+ reg_btf = btf_vmlinux;
+ reg_ref_id = *reg2btf_ids[base_type(reg->type)];
+ }
+
+ if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
+ strict_type_match = true;
+
+ reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
+ reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
+ if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
+ verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
+ meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
+ btf_type_str(reg_ref_t), reg_ref_tname);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ const struct btf_type *ref_t,
+ const char *ref_tname,
+ struct bpf_kfunc_call_arg_meta *meta,
+ int argno)
+{
+ struct btf_field *kptr_field;
+
+ /* check_func_arg_reg_off allows var_off for
+ * PTR_TO_MAP_VALUE, but we need fixed offset to find
+ * off_desc.
+ */
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "arg#0 must have constant offset\n");
+ return -EINVAL;
+ }
+
+ kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
+ if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
+ verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
+ reg->off + reg->var_off.value);
+ return -EINVAL;
+ }
+
+ if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id, true)) {
+ verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
+ meta->func_name, argno, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
+{
+ struct bpf_func_state *state = cur_func(env);
+ struct bpf_reg_state *reg;
+ int i;
+
+ /* bpf_spin_lock only allows calling list_push and list_pop, no BPF
+ * subprogs, no global functions. This means that the references would
+ * not be released inside the critical section but they may be added to
+ * the reference state, and the acquired_refs are never copied out for a
+ * different frame as BPF to BPF calls don't work in bpf_spin_lock
+ * critical sections.
+ */
+ if (!ref_obj_id) {
+ verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
+ return -EFAULT;
+ }
+ for (i = 0; i < state->acquired_refs; i++) {
+ if (state->refs[i].id == ref_obj_id) {
+ if (state->refs[i].release_on_unlock) {
+ verbose(env, "verifier internal error: expected false release_on_unlock");
+ return -EFAULT;
+ }
+ state->refs[i].release_on_unlock = true;
+ /* Now mark everyone sharing same ref_obj_id as untrusted */
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+ if (reg->ref_obj_id == ref_obj_id)
+ reg->type |= PTR_UNTRUSTED;
+ }));
+ return 0;
+ }
+ }
+ verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
+ return -EFAULT;
+}
+
+/* Implementation details:
+ *
+ * Each register points to some region of memory, which we define as an
+ * allocation. Each allocation may embed a bpf_spin_lock which protects any
+ * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
+ * allocation. The lock and the data it protects are colocated in the same
+ * memory region.
+ *
+ * Hence, everytime a register holds a pointer value pointing to such
+ * allocation, the verifier preserves a unique reg->id for it.
+ *
+ * The verifier remembers the lock 'ptr' and the lock 'id' whenever
+ * bpf_spin_lock is called.
+ *
+ * To enable this, lock state in the verifier captures two values:
+ * active_lock.ptr = Register's type specific pointer
+ * active_lock.id = A unique ID for each register pointer value
+ *
+ * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
+ * supported register types.
+ *
+ * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
+ * allocated objects is the reg->btf pointer.
+ *
+ * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
+ * can establish the provenance of the map value statically for each distinct
+ * lookup into such maps. They always contain a single map value hence unique
+ * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
+ *
+ * So, in case of global variables, they use array maps with max_entries = 1,
+ * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
+ * into the same map value as max_entries is 1, as described above).
+ *
+ * In case of inner map lookups, the inner map pointer has same map_ptr as the
+ * outer map pointer (in verifier context), but each lookup into an inner map
+ * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
+ * maps from the same outer map share the same map_ptr as active_lock.ptr, they
+ * will get different reg->id assigned to each lookup, hence different
+ * active_lock.id.
+ *
+ * In case of allocated objects, active_lock.ptr is the reg->btf, and the
+ * reg->id is a unique ID preserved after the NULL pointer check on the pointer
+ * returned from bpf_obj_new. Each allocation receives a new reg->id.
+ */
+static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ void *ptr;
+ u32 id;
+
+ switch ((int)reg->type) {
+ case PTR_TO_MAP_VALUE:
+ ptr = reg->map_ptr;
+ break;
+ case PTR_TO_BTF_ID | MEM_ALLOC:
+ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
+ ptr = reg->btf;
+ break;
+ default:
+ verbose(env, "verifier internal error: unknown reg type for lock check\n");
+ return -EFAULT;
+ }
+ id = reg->id;
+
+ if (!env->cur_state->active_lock.ptr)
+ return -EINVAL;
+ if (env->cur_state->active_lock.ptr != ptr ||
+ env->cur_state->active_lock.id != id) {
+ verbose(env, "held lock and object are not in the same allocation\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool is_bpf_list_api_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
+ btf_id == special_kfunc_list[KF_bpf_list_pop_back];
+}
+
+static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ struct btf_field *field;
+ struct btf_record *rec;
+ u32 list_head_off;
+
+ if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
+ verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
+ return -EFAULT;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env,
+ "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
+ regno);
+ return -EINVAL;
+ }
+
+ rec = reg_btf_record(reg);
+ list_head_off = reg->off + reg->var_off.value;
+ field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
+ if (!field) {
+ verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
+ return -EINVAL;
+ }
+
+ /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
+ if (check_reg_allocation_locked(env, reg)) {
+ verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
+ rec->spin_lock_off);
+ return -EINVAL;
+ }
+
+ if (meta->arg_list_head.field) {
+ verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
+ return -EFAULT;
+ }
+ meta->arg_list_head.field = field;
+ return 0;
+}
+
+static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ const struct btf_type *et, *t;
+ struct btf_field *field;
+ struct btf_record *rec;
+ u32 list_node_off;
+
+ if (meta->btf != btf_vmlinux ||
+ (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
+ meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
+ verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
+ return -EFAULT;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env,
+ "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
+ regno);
+ return -EINVAL;
+ }
+
+ rec = reg_btf_record(reg);
+ list_node_off = reg->off + reg->var_off.value;
+ field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
+ if (!field || field->offset != list_node_off) {
+ verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
+ return -EINVAL;
+ }
+
+ field = meta->arg_list_head.field;
+
+ et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf,
+ field->list_head.value_btf_id, true)) {
+ verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
+ "in struct %s, but arg is at offset=%d in struct %s\n",
+ field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off),
+ list_node_off, btf_name_by_offset(reg->btf, t->name_off));
+ return -EINVAL;
+ }
+
+ if (list_node_off != field->list_head.node_offset) {
+ verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
+ list_node_off, field->list_head.node_offset,
+ btf_name_by_offset(field->list_head.btf, et->name_off));
+ return -EINVAL;
+ }
+ /* Set arg#1 for expiration after unlock */
+ return ref_set_release_on_unlock(env, reg->ref_obj_id);
+}
+
+static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
+{
+ const char *func_name = meta->func_name, *ref_tname;
+ const struct btf *btf = meta->btf;
+ const struct btf_param *args;
+ u32 i, nargs;
+ int ret;
+
+ args = (const struct btf_param *)(meta->func_proto + 1);
+ nargs = btf_type_vlen(meta->func_proto);
+ if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+ verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
+ MAX_BPF_FUNC_REG_ARGS);
+ return -EINVAL;
+ }
+
+ /* Check that BTF function arguments match actual types that the
+ * verifier sees.
+ */
+ for (i = 0; i < nargs; i++) {
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
+ const struct btf_type *t, *ref_t, *resolve_ret;
+ enum bpf_arg_type arg_type = ARG_DONTCARE;
+ u32 regno = i + 1, ref_id, type_size;
+ bool is_ret_buf_sz = false;
+ int kf_arg_type;
+
+ t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+
+ if (is_kfunc_arg_ignore(btf, &args[i]))
+ continue;
+
+ if (btf_type_is_scalar(t)) {
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "R%d is not a scalar\n", regno);
+ return -EINVAL;
+ }
+
+ if (is_kfunc_arg_constant(meta->btf, &args[i])) {
+ if (meta->arg_constant.found) {
+ verbose(env, "verifier internal error: only one constant argument permitted\n");
+ return -EFAULT;
+ }
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d must be a known constant\n", regno);
+ return -EINVAL;
+ }
+ ret = mark_chain_precision(env, regno);
+ if (ret < 0)
+ return ret;
+ meta->arg_constant.found = true;
+ meta->arg_constant.value = reg->var_off.value;
+ } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
+ meta->r0_rdonly = true;
+ is_ret_buf_sz = true;
+ } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
+ is_ret_buf_sz = true;
+ }
+
+ if (is_ret_buf_sz) {
+ if (meta->r0_size) {
+ verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
+ return -EINVAL;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d is not a const\n", regno);
+ return -EINVAL;
+ }
+
+ meta->r0_size = reg->var_off.value;
+ ret = mark_chain_precision(env, regno);
+ if (ret)
+ return ret;
+ }
+ continue;
+ }
+
+ if (!btf_type_is_ptr(t)) {
+ verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
+ return -EINVAL;
+ }
+
+ if (reg->ref_obj_id) {
+ if (is_kfunc_release(meta) && meta->ref_obj_id) {
+ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id,
+ meta->ref_obj_id);
+ return -EFAULT;
+ }
+ meta->ref_obj_id = reg->ref_obj_id;
+ if (is_kfunc_release(meta))
+ meta->release_regno = regno;
+ }
+
+ ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+
+ kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
+ if (kf_arg_type < 0)
+ return kf_arg_type;
+
+ switch (kf_arg_type) {
+ case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+ case KF_ARG_PTR_TO_BTF_ID:
+ if (!is_kfunc_trusted_args(meta))
+ break;
+
+ if (!is_trusted_reg(reg)) {
+ verbose(env, "R%d must be referenced or trusted\n", regno);
+ return -EINVAL;
+ }
+ fallthrough;
+ case KF_ARG_PTR_TO_CTX:
+ /* Trusted arguments have the same offset checks as release arguments */
+ arg_type |= OBJ_RELEASE;
+ break;
+ case KF_ARG_PTR_TO_KPTR:
+ case KF_ARG_PTR_TO_DYNPTR:
+ case KF_ARG_PTR_TO_LIST_HEAD:
+ case KF_ARG_PTR_TO_LIST_NODE:
+ case KF_ARG_PTR_TO_MEM:
+ case KF_ARG_PTR_TO_MEM_SIZE:
+ /* Trusted by default */
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EFAULT;
+ }
+
+ if (is_kfunc_release(meta) && reg->ref_obj_id)
+ arg_type |= OBJ_RELEASE;
+ ret = check_func_arg_reg_off(env, reg, regno, arg_type);
+ if (ret < 0)
+ return ret;
+
+ switch (kf_arg_type) {
+ case KF_ARG_PTR_TO_CTX:
+ if (reg->type != PTR_TO_CTX) {
+ verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
+ return -EINVAL;
+ }
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
+ ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
+ if (ret < 0)
+ return -EINVAL;
+ meta->ret_btf_id = ret;
+ }
+ break;
+ case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ return -EINVAL;
+ }
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ if (meta->btf == btf_vmlinux &&
+ meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ meta->arg_obj_drop.btf = reg->btf;
+ meta->arg_obj_drop.btf_id = reg->btf_id;
+ }
+ break;
+ case KF_ARG_PTR_TO_KPTR:
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ verbose(env, "arg#0 expected pointer to map value\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_DYNPTR:
+ if (reg->type != PTR_TO_STACK) {
+ verbose(env, "arg#%d expected pointer to stack\n", i);
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_reg_valid_init(env, reg)) {
+ verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n",
+ i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
+ verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
+ i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ break;
+ case KF_ARG_PTR_TO_LIST_HEAD:
+ if (reg->type != PTR_TO_MAP_VALUE &&
+ reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
+ return -EINVAL;
+ }
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_LIST_NODE:
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ return -EINVAL;
+ }
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_BTF_ID:
+ /* Only base_type is checked, further checks are done here */
+ if ((base_type(reg->type) != PTR_TO_BTF_ID ||
+ bpf_type_has_unsafe_modifiers(reg->type)) &&
+ !reg2btf_ids[base_type(reg->type)]) {
+ verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
+ verbose(env, "expected %s or socket\n",
+ reg_type_str(env, base_type(reg->type) |
+ (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_MEM:
+ resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
+ if (IS_ERR(resolve_ret)) {
+ verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
+ i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
+ return -EINVAL;
+ }
+ ret = check_mem_reg(env, reg, regno, type_size);
+ if (ret < 0)
+ return ret;
+ break;
+ case KF_ARG_PTR_TO_MEM_SIZE:
+ ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
+ if (ret < 0) {
+ verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
+ return ret;
+ }
+ /* Skip next '__sz' argument */
+ i++;
+ break;
+ }
+ }
+
+ if (is_kfunc_release(meta) && !meta->release_regno) {
+ verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+ func_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
- struct bpf_kfunc_arg_meta meta = { 0 };
const char *func_name, *ptr_type_name;
+ bool sleepable, rcu_lock, rcu_unlock;
+ struct bpf_kfunc_call_arg_meta meta;
u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
+ const struct btf_type *ret_t;
struct btf *desc_btf;
u32 *kfunc_flags;
- bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
if (!insn->imm)
@@ -7830,24 +8905,68 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
func_name);
return -EACCES;
}
- if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) {
- verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n");
+
+ /* Prepare kfunc call metadata */
+ memset(&meta, 0, sizeof(meta));
+ meta.btf = desc_btf;
+ meta.func_id = func_id;
+ meta.kfunc_flags = *kfunc_flags;
+ meta.func_proto = func_proto;
+ meta.func_name = func_name;
+
+ if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
+ verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
+ return -EACCES;
+ }
+
+ sleepable = is_kfunc_sleepable(&meta);
+ if (sleepable && !env->prog->aux->sleepable) {
+ verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
+ return -EACCES;
+ }
+
+ rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
+ rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
+ if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
+ verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
return -EACCES;
}
- acq = *kfunc_flags & KF_ACQUIRE;
+ if (env->cur_state->active_rcu_lock) {
+ struct bpf_func_state *state;
+ struct bpf_reg_state *reg;
- meta.flags = *kfunc_flags;
+ if (rcu_lock) {
+ verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
+ return -EINVAL;
+ } else if (rcu_unlock) {
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+ if (reg->type & MEM_RCU) {
+ reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+ reg->type |= PTR_UNTRUSTED;
+ }
+ }));
+ env->cur_state->active_rcu_lock = false;
+ } else if (sleepable) {
+ verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
+ return -EACCES;
+ }
+ } else if (rcu_lock) {
+ env->cur_state->active_rcu_lock = true;
+ } else if (rcu_unlock) {
+ verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
+ return -EINVAL;
+ }
/* Check the arguments */
- err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta);
+ err = check_kfunc_args(env, &meta);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted
- * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+ * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/
- if (err) {
- err = release_reference(env, regs[err].ref_obj_id);
+ if (meta.release_regno) {
+ err = release_reference(env, regs[meta.release_regno].ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
func_name, func_id);
@@ -7861,18 +8980,92 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
- if (acq && !btf_type_is_struct_ptr(desc_btf, t)) {
- verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
- return -EINVAL;
+ if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
+ /* Only exception is bpf_obj_new_impl */
+ if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
+ verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+ return -EINVAL;
+ }
}
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
} else if (btf_type_is_ptr(t)) {
- ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
- &ptr_type_id);
- if (!btf_type_is_struct(ptr_type)) {
+ ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
+
+ if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ struct btf *ret_btf;
+ u32 ret_btf_id;
+
+ if (unlikely(!bpf_global_ma_set))
+ return -ENOMEM;
+
+ if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
+ verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
+ return -EINVAL;
+ }
+
+ ret_btf = env->prog->aux->btf;
+ ret_btf_id = meta.arg_constant.value;
+
+ /* This may be NULL due to user not supplying a BTF */
+ if (!ret_btf) {
+ verbose(env, "bpf_obj_new requires prog BTF\n");
+ return -EINVAL;
+ }
+
+ ret_t = btf_type_by_id(ret_btf, ret_btf_id);
+ if (!ret_t || !__btf_type_is_struct(ret_t)) {
+ verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = ret_btf;
+ regs[BPF_REG_0].btf_id = ret_btf_id;
+
+ env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
+ env->insn_aux_data[insn_idx].kptr_struct_meta =
+ btf_find_struct_meta(ret_btf, ret_btf_id);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ env->insn_aux_data[insn_idx].kptr_struct_meta =
+ btf_find_struct_meta(meta.arg_obj_drop.btf,
+ meta.arg_obj_drop.btf_id);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
+ meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
+ struct btf_field *field = meta.arg_list_head.field;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = field->list_head.btf;
+ regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
+ regs[BPF_REG_0].off = field->list_head.node_offset;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+ ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
+ if (!ret_t || !btf_type_is_struct(ret_t)) {
+ verbose(env,
+ "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta.arg_constant.value;
+ } else {
+ verbose(env, "kernel function %s unhandled dynamic return type\n",
+ meta.func_name);
+ return -EFAULT;
+ }
+ } else if (!__btf_type_is_struct(ptr_type)) {
if (!meta.r0_size) {
ptr_type_name = btf_name_by_offset(desc_btf,
ptr_type->name_off);
@@ -7900,20 +9093,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
}
- if (*kfunc_flags & KF_RET_NULL) {
+
+ if (is_kfunc_ret_null(&meta)) {
regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
regs[BPF_REG_0].id = ++env->id_gen;
}
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
- if (acq) {
+ if (is_kfunc_acquire(&meta)) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
- regs[BPF_REG_0].id = id;
+ if (is_kfunc_ret_null(&meta))
+ regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
}
+ if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
+ regs[BPF_REG_0].id = ++env->id_gen;
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
nargs = btf_type_vlen(func_proto);
@@ -10086,16 +11283,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
{
if (type_may_be_null(reg->type) && reg->id == id &&
!WARN_ON_ONCE(!reg->id)) {
- if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
- !tnum_equals_const(reg->var_off, 0) ||
- reg->off)) {
- /* Old offset (both fixed and variable parts) should
- * have been known-zero, because we don't allow pointer
- * arithmetic on pointers that might be NULL. If we
- * see this happening, don't convert the register.
- */
+ /* Old offset (both fixed and variable parts) should have been
+ * known-zero, because we don't allow pointer arithmetic on
+ * pointers that might be NULL. If we see this happening, don't
+ * convert the register.
+ *
+ * But in some cases, some helpers that return local kptrs
+ * advance offset for the returned pointer. In those cases, it
+ * is fine to expect to see reg->off.
+ */
+ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
+ return;
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
return;
- }
if (is_null) {
reg->type = SCALAR_VALUE;
/* We don't need id and ref_obj_id from this point
@@ -10269,6 +11469,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_verifier_state *other_branch;
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
+ struct bpf_reg_state *eq_branch_regs;
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -10378,8 +11579,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
/* detect if we are comparing against a constant value so we can adjust
* our min/max values for our dst register.
* this is only legit if both are scalars (or pointers to the same
- * object, I suppose, but we don't support that right now), because
- * otherwise the different base pointers mean the offsets aren't
+ * object, I suppose, see the PTR_MAYBE_NULL related if block below),
+ * because otherwise the different base pointers mean the offsets aren't
* comparable.
*/
if (BPF_SRC(insn->code) == BPF_X) {
@@ -10428,6 +11629,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
}
+ /* if one pointer register is compared to another pointer
+ * register check if PTR_MAYBE_NULL could be lifted.
+ * E.g. register A - maybe null
+ * register B - not null
+ * for JNE A, B, ... - A is not null in the false branch;
+ * for JEQ A, B, ... - A is not null in the true branch.
+ */
+ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
+ __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
+ type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) {
+ eq_branch_regs = NULL;
+ switch (opcode) {
+ case BPF_JEQ:
+ eq_branch_regs = other_branch_regs;
+ break;
+ case BPF_JNE:
+ eq_branch_regs = regs;
+ break;
+ default:
+ /* do nothing */
+ break;
+ }
+ if (eq_branch_regs) {
+ if (type_may_be_null(src_reg->type))
+ mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
+ else
+ mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
+ }
+ }
+
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
* NOTE: these optimizations below are related with pointer comparison
* which will never be JMP32.
@@ -10534,8 +11765,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
dst_reg->type = PTR_TO_MAP_VALUE;
dst_reg->off = aux->map_off;
- if (btf_record_has_field(map->record, BPF_SPIN_LOCK))
- dst_reg->id = ++env->id_gen;
+ WARN_ON_ONCE(map->max_entries != 1);
+ /* We want reg->id to be same (0) as map_value is not distinct */
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
insn->src_reg == BPF_PSEUDO_MAP_IDX) {
dst_reg->type = CONST_PTR_TO_MAP;
@@ -10613,11 +11844,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
}
- if (env->cur_state->active_spin_lock) {
+ if (env->cur_state->active_lock.ptr) {
verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
return -EINVAL;
}
+ if (env->cur_state->active_rcu_lock) {
+ verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
+ return -EINVAL;
+ }
+
if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -11879,7 +13115,11 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->speculative && !cur->speculative)
return false;
- if (old->active_spin_lock != cur->active_spin_lock)
+ if (old->active_lock.ptr != cur->active_lock.ptr ||
+ old->active_lock.id != cur->active_lock.id)
+ return false;
+
+ if (old->active_rcu_lock != cur->active_rcu_lock)
return false;
/* for states to be equal callsites have to be the same
@@ -12524,11 +13764,14 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (env->cur_state->active_spin_lock &&
- (insn->src_reg == BPF_PSEUDO_CALL ||
- insn->imm != BPF_FUNC_spin_unlock)) {
- verbose(env, "function calls are not allowed while holding a lock\n");
- return -EINVAL;
+ if (env->cur_state->active_lock.ptr) {
+ if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
+ (insn->src_reg == BPF_PSEUDO_CALL) ||
+ (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+ (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
+ verbose(env, "function calls are not allowed while holding a lock\n");
+ return -EINVAL;
+ }
}
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
@@ -12561,11 +13804,16 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (env->cur_state->active_spin_lock) {
+ if (env->cur_state->active_lock.ptr) {
verbose(env, "bpf_spin_unlock is missing\n");
return -EINVAL;
}
+ if (env->cur_state->active_rcu_lock) {
+ verbose(env, "bpf_rcu_read_unlock is missing\n");
+ return -EINVAL;
+ }
+
/* We must do check_reference_leak here before
* prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback
@@ -12818,6 +14066,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
+ if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+ if (is_tracing_prog_type(prog_type)) {
+ verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+ return -EINVAL;
+ }
+ }
+
if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
@@ -13654,6 +14909,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
break;
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | PTR_UNTRUSTED:
+ /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
+ * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
+ * be said once it is marked PTR_UNTRUSTED, hence we must handle
+ * any faults for loads into such types. BPF_WRITE is disallowed
+ * for this case.
+ */
+ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
if (type == BPF_READ) {
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
@@ -14019,8 +15281,8 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
-static int fixup_kfunc_call(struct bpf_verifier_env *env,
- struct bpf_insn *insn)
+static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
const struct bpf_kfunc_desc *desc;
@@ -14039,8 +15301,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
return -EFAULT;
}
+ *cnt = 0;
insn->imm = desc->imm;
-
+ if (insn->off)
+ return 0;
+ if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+ u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
+
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
+ insn_buf[1] = addr[0];
+ insn_buf[2] = addr[1];
+ insn_buf[3] = *insn;
+ *cnt = 4;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+
+ insn_buf[0] = addr[0];
+ insn_buf[1] = addr[1];
+ insn_buf[2] = *insn;
+ *cnt = 3;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+ desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+ insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+ *cnt = 1;
+ }
return 0;
}
@@ -14182,9 +15469,19 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
continue;
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- ret = fixup_kfunc_call(env, insn);
+ ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
if (ret)
return ret;
+ if (cnt == 0)
+ continue;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
continue;
}
@@ -14302,14 +15599,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
- if (insn->imm == BPF_FUNC_task_storage_get ||
- insn->imm == BPF_FUNC_sk_storage_get ||
- insn->imm == BPF_FUNC_inode_storage_get ||
- insn->imm == BPF_FUNC_cgrp_storage_get) {
- if (env->prog->aux->sleepable)
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
- else
+ if (is_storage_get_function(insn->imm)) {
+ if (!env->prog->aux->sleepable ||
+ env->insn_aux_data[i + delta].storage_get_func_atomic)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+ else
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
insn_buf[1] = *insn;
cnt = 2;
@@ -14379,7 +15674,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_redirect,
- (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
+ (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
(int (*)(struct bpf_map *map,
bpf_callback_t callback_fn,
@@ -15388,6 +16683,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
+ env->rcu_tag_supported = btf_vmlinux &&
+ btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f2d8d070d024..3bbd3f0c810c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -774,7 +774,7 @@ BPF_CALL_0(bpf_get_current_task_btf)
const struct bpf_func_proto bpf_get_current_task_btf_proto = {
.func = bpf_get_current_task_btf,
.gpl_only = true,
- .ret_type = RET_PTR_TO_BTF_ID,
+ .ret_type = RET_PTR_TO_BTF_ID_TRUSTED,
.ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};
@@ -1485,9 +1485,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
- return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+ return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
- return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
+ return &bpf_copy_from_user_task_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr:
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index e78dadfc5829..2d434c1f4617 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -156,29 +156,29 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
}
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id,
enum bpf_type_flag *flag)
{
const struct btf_type *state;
+ const struct btf_type *t;
s32 type_id;
int err;
- type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state",
+ type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state",
BTF_KIND_STRUCT);
if (type_id < 0)
return -EINVAL;
- state = btf_type_by_id(btf, type_id);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
+ state = btf_type_by_id(reg->btf, type_id);
if (t != state) {
bpf_log(log, "only access to bpf_dummy_ops_state is supported\n");
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fcb3e6c5e03c..6094ef7cffcd 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -980,9 +980,6 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
{
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
- if (!skb->len)
- return -EINVAL;
-
if (!__skb)
return 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index da4f697e4fa4..37baaa6b8fc3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2124,12 +2124,13 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
{
unsigned int mlen = skb_network_offset(skb);
+ if (unlikely(skb->len <= mlen)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
+
if (mlen) {
__skb_pull(skb, mlen);
- if (unlikely(!skb->len)) {
- kfree_skb(skb);
- return -ERANGE;
- }
/* At ingress, the mac header has already been pulled once.
* At egress, skb_pospull_rcsum has to be done in case that
@@ -2149,7 +2150,7 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
/* Verify that a link layer header is carried */
- if (unlikely(skb->mac_header >= skb->network_header)) {
+ if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) {
kfree_skb(skb);
return -ERANGE;
}
@@ -4108,7 +4109,10 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
-/* XDP_REDIRECT works by a three-step process, implemented in the functions
+/**
+ * DOC: xdp redirect
+ *
+ * XDP_REDIRECT works by a three-step process, implemented in the functions
* below:
*
* 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
@@ -4123,7 +4127,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
* 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
* which will flush all the different bulk queues, thus completing the
* redirect.
- *
+ */
+/*
* Pointers to the map entries will be kept around for this whole sequence of
* steps, protected by RCU. However, there is no top-level rcu_read_lock() in
* the core code; instead, the RCU protection relies on everything happening
@@ -4414,10 +4419,10 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
u64, flags)
{
- return map->ops->map_redirect(map, ifindex, flags);
+ return map->ops->map_redirect(map, key, flags);
}
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
@@ -8651,28 +8656,25 @@ static bool tc_cls_act_is_valid_access(int off, int size,
DEFINE_MUTEX(nf_conn_btf_access_lock);
EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
-int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype, u32 *next_btf_id,
- enum bpf_type_flag *flag);
+int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag);
EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
int ret = -EACCES;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
@@ -8738,21 +8740,18 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
static int xdp_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
int ret = -EACCES;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 6da16ae6a962..4517d2bd186a 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -61,7 +61,9 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info))
return false;
- if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
+ if (base_type(info->reg_type) == PTR_TO_BTF_ID &&
+ !bpf_type_has_unsafe_modifiers(info->reg_type) &&
+ info->btf_id == sock_id)
/* promote it to tcp_sock */
info->btf_id = tcp_sock_id;
@@ -69,18 +71,17 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
}
static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ const struct btf_type *t;
size_t end;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
return -EACCES;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 8639e7efd0e2..24002bc61e07 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -191,19 +191,16 @@ BTF_ID(struct, nf_conn___init)
/* Check writes into `struct nf_conn` */
static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
- const struct btf_type *ncit;
- const struct btf_type *nct;
+ const struct btf_type *ncit, *nct, *t;
size_t end;
- ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
- nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
-
+ ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
+ nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
if (t != nct && t != ncit) {
bpf_log(log, "only read is supported\n");
return -EACCES;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index acc8e52a4f5f..771d0fa90ef5 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -231,9 +231,9 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags)
{
- return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+ return __bpf_xdp_redirect_map(map, index, flags, 0,
__xsk_map_lookup_elem);
}
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
index 12faf5847e22..395573be6ae8 100755
--- a/samples/bpf/test_cgrp2_tc.sh
+++ b/samples/bpf/test_cgrp2_tc.sh
@@ -115,7 +115,7 @@ do_exit() {
if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
then
echo "------ DEBUG ------"
- echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
+ echo "mount: "; mount | grep -E '(cgroup2|bpf)'; echo
echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
if [ -d "$BPF_FS_TC_SHARE" ]
then
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 683913bbf279..9d41db09c480 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -162,7 +162,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
__be32 gw;
} *prefix_value;
- prefix_key = alloca(sizeof(*prefix_key) + 3);
+ prefix_key = alloca(sizeof(*prefix_key) + 4);
prefix_value = alloca(sizeof(*prefix_value));
prefix_key->prefixlen = 32;
diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
index 05350a1aadf9..30df7a707f02 100644
--- a/tools/bpf/bpftool/Documentation/common_options.rst
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -23,12 +23,3 @@
Print all logs available, even debug-level information. This includes
logs from libbpf as well as from the verifier, when attempting to
load programs.
-
--l, --legacy
- Use legacy libbpf mode which has more relaxed BPF program
- requirements. By default, bpftool has more strict requirements
- about section names, changes pinning logic and doesn't support
- some of the older non-BTF map declarations.
-
- See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0
- for details.
diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst
index ccf1ffa0686c..827e3ffb1766 100644
--- a/tools/bpf/bpftool/Documentation/substitutions.rst
+++ b/tools/bpf/bpftool/Documentation/substitutions.rst
@@ -1,3 +1,3 @@
.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** }
+.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** }
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 2957b42cab67..35f26f7c1124 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -261,7 +261,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf --legacy'
+ --use-loader --base-btf'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index b87e4a7fd689..352290ba7b29 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -467,9 +467,8 @@ static int dump_btf_c(const struct btf *btf,
int err = 0, i;
d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
- err = libbpf_get_error(d);
- if (err)
- return err;
+ if (!d)
+ return -errno;
printf("#ifndef __VMLINUX_H__\n");
printf("#define __VMLINUX_H__\n");
@@ -512,11 +511,9 @@ static struct btf *get_vmlinux_btf_from_sysfs(void)
struct btf *base;
base = btf__parse(sysfs_vmlinux, NULL);
- if (libbpf_get_error(base)) {
- p_err("failed to parse vmlinux BTF at '%s': %ld\n",
- sysfs_vmlinux, libbpf_get_error(base));
- base = NULL;
- }
+ if (!base)
+ p_err("failed to parse vmlinux BTF at '%s': %d\n",
+ sysfs_vmlinux, -errno);
return base;
}
@@ -559,7 +556,7 @@ static int do_dump(int argc, char **argv)
__u32 btf_id = -1;
const char *src;
int fd = -1;
- int err;
+ int err = 0;
if (!REQ_ARGS(2)) {
usage();
@@ -634,8 +631,8 @@ static int do_dump(int argc, char **argv)
base = get_vmlinux_btf_from_sysfs();
btf = btf__parse_split(*argv, base ?: base_btf);
- err = libbpf_get_error(btf);
if (!btf) {
+ err = -errno;
p_err("failed to load BTF from %s: %s",
*argv, strerror(errno));
goto done;
@@ -681,8 +678,8 @@ static int do_dump(int argc, char **argv)
}
btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
- err = libbpf_get_error(btf);
if (!btf) {
+ err = -errno;
p_err("get btf by id (%u): %s", btf_id, strerror(errno));
goto done;
}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 19924b6ce796..eda71fdfe95a 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -75,7 +75,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
goto print;
prog_btf = btf__load_from_kernel_by_id(info.btf_id);
- if (libbpf_get_error(prog_btf))
+ if (!prog_btf)
goto print;
func_type = btf__type_by_id(prog_btf, finfo.type_id);
if (!func_type || !btf_is_func(func_type))
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 01bb8d8f5568..2883660d6b67 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -252,9 +252,8 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
int err = 0;
d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
- err = libbpf_get_error(d);
- if (err)
- return err;
+ if (!d)
+ return -errno;
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
@@ -976,13 +975,12 @@ static int do_skeleton(int argc, char **argv)
/* log_level1 + log_level2 + stats, but not stable UAPI */
opts.kernel_log_level = 1 + 2 + 4;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
- err = libbpf_get_error(obj);
- if (err) {
+ if (!obj) {
char err_buf[256];
+ err = -errno;
libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
- obj = NULL;
goto out;
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index a3e6b167153d..9a1d2365a297 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -4,6 +4,7 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
+#include <errno.h>
#include <unistd.h>
#include <linux/err.h>
#include <bpf/libbpf.h>
@@ -48,8 +49,8 @@ static int do_pin(int argc, char **argv)
}
obj = bpf_object__open(objfile);
- err = libbpf_get_error(obj);
- if (err) {
+ if (!obj) {
+ err = -errno;
p_err("can't open objfile %s", objfile);
goto close_map_fd;
}
@@ -62,13 +63,14 @@ static int do_pin(int argc, char **argv)
prog = bpf_object__next_program(obj, NULL);
if (!prog) {
+ err = -errno;
p_err("can't find bpf program in objfile %s", objfile);
goto close_obj;
}
link = bpf_program__attach_iter(prog, &iter_opts);
- err = libbpf_get_error(link);
- if (err) {
+ if (!link) {
+ err = -errno;
p_err("attach_iter failed for program %s",
bpf_program__name(prog));
goto close_obj;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 741e50ee0b6c..08d0ac543c67 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,7 +31,6 @@ bool block_mount;
bool verifier_logs;
bool relaxed_maps;
bool use_loader;
-bool legacy_libbpf;
struct btf *base_btf;
struct hashmap *refs_table;
@@ -160,7 +159,6 @@ static int do_version(int argc, char **argv)
jsonw_start_object(json_wtr); /* features */
jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
jsonw_bool_field(json_wtr, "llvm", has_llvm);
- jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf);
jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
jsonw_bool_field(json_wtr, "bootstrap", bootstrap);
jsonw_end_object(json_wtr); /* features */
@@ -179,7 +177,6 @@ static int do_version(int argc, char **argv)
printf("features:");
print_feature("libbfd", has_libbfd, &nb_features);
print_feature("llvm", has_llvm, &nb_features);
- print_feature("libbpf_strict", !legacy_libbpf, &nb_features);
print_feature("skeletons", has_skeletons, &nb_features);
print_feature("bootstrap", bootstrap, &nb_features);
printf("\n");
@@ -337,12 +334,12 @@ static int do_batch(int argc, char **argv)
if (argc < 2) {
p_err("too few parameters for batch");
return -1;
- } else if (!is_prefix(*argv, "file")) {
- p_err("expected 'file', got: %s", *argv);
- return -1;
} else if (argc > 2) {
p_err("too many parameters for batch");
return -1;
+ } else if (!is_prefix(*argv, "file")) {
+ p_err("expected 'file', got: %s", *argv);
+ return -1;
}
NEXT_ARG();
@@ -451,7 +448,6 @@ int main(int argc, char **argv)
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
{ "base-btf", required_argument, NULL, 'B' },
- { "legacy", no_argument, NULL, 'l' },
{ 0 }
};
bool version_requested = false;
@@ -514,19 +510,15 @@ int main(int argc, char **argv)
break;
case 'B':
base_btf = btf__parse(optarg, NULL);
- if (libbpf_get_error(base_btf)) {
- p_err("failed to parse base BTF at '%s': %ld\n",
- optarg, libbpf_get_error(base_btf));
- base_btf = NULL;
+ if (!base_btf) {
+ p_err("failed to parse base BTF at '%s': %d\n",
+ optarg, -errno);
return -1;
}
break;
case 'L':
use_loader = true;
break;
- case 'l':
- legacy_libbpf = true;
- break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -536,14 +528,6 @@ int main(int argc, char **argv)
}
}
- if (!legacy_libbpf) {
- /* Allow legacy map definitions for skeleton generation.
- * It will still be rejected if users use LIBBPF_STRICT_ALL
- * mode for loading generated skeleton.
- */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
- }
-
argc -= optind;
argv += optind;
if (argc < 0)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index d4e8a1aef787..a84224b6a604 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
#define HELP_SPEC_LINK \
@@ -82,7 +82,6 @@ extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
extern bool use_loader;
-extern bool legacy_libbpf;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index d884070a2314..88911d3aa2d9 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -786,18 +786,18 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf)
if (info->btf_vmlinux_value_type_id) {
if (!btf_vmlinux) {
btf_vmlinux = libbpf_find_kernel_btf();
- err = libbpf_get_error(btf_vmlinux);
- if (err) {
+ if (!btf_vmlinux) {
p_err("failed to get kernel btf");
- return err;
+ return -errno;
}
}
*btf = btf_vmlinux;
} else if (info->btf_value_type_id) {
*btf = btf__load_from_kernel_by_id(info->btf_id);
- err = libbpf_get_error(*btf);
- if (err)
+ if (!*btf) {
+ err = -errno;
p_err("failed to get btf");
+ }
} else {
*btf = NULL;
}
@@ -807,16 +807,10 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf)
static void free_map_kv_btf(struct btf *btf)
{
- if (!libbpf_get_error(btf) && btf != btf_vmlinux)
+ if (btf != btf_vmlinux)
btf__free(btf);
}
-static void free_btf_vmlinux(void)
-{
- if (!libbpf_get_error(btf_vmlinux))
- btf__free(btf_vmlinux);
-}
-
static int
map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
bool show_header)
@@ -953,7 +947,7 @@ exit_close:
close(fds[i]);
exit_free:
free(fds);
- free_btf_vmlinux();
+ btf__free(btf_vmlinux);
return err;
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9d32ffb9f22e..cfc9fdc1e863 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -322,7 +322,7 @@ static void show_prog_metadata(int fd, __u32 num_maps)
return;
btf = btf__load_from_kernel_by_id(map_info.btf_id);
- if (libbpf_get_error(btf))
+ if (!btf)
goto out_free;
t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -726,7 +726,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (info->btf_id) {
btf = btf__load_from_kernel_by_id(info->btf_id);
- if (libbpf_get_error(btf)) {
+ if (!btf) {
p_err("failed to get btf");
return -1;
}
@@ -1663,7 +1663,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
open_opts.kernel_log_level = 1 + 2 + 4;
obj = bpf_object__open_file(file, &open_opts);
- if (libbpf_get_error(obj)) {
+ if (!obj) {
p_err("failed to open object file");
goto err_free_reuse_maps;
}
@@ -1802,11 +1802,6 @@ err_unpin:
else
bpf_object__unpin_programs(obj, pinfile);
err_close_obj:
- if (!legacy_libbpf) {
- p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n"
- "If it used to work for this object file but now doesn't, see --legacy option for more details.\n");
- }
-
bpf_object__close(obj);
err_free_reuse_maps:
for (i = 0; i < old_map_fds; i++)
@@ -1887,7 +1882,7 @@ static int do_loader(int argc, char **argv)
open_opts.kernel_log_level = 1 + 2 + 4;
obj = bpf_object__open_file(file, &open_opts);
- if (libbpf_get_error(obj)) {
+ if (!obj) {
p_err("failed to open object file");
goto err_close_obj;
}
@@ -2204,7 +2199,7 @@ static char *profile_target_name(int tgt_fd)
}
btf = btf__load_from_kernel_by_id(info.btf_id);
- if (libbpf_get_error(btf)) {
+ if (!btf) {
p_err("failed to load btf for prog FD %d", tgt_fd);
goto out;
}
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e08a6ff2866c..903b80ff4e9a 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void)
return btf_vmlinux;
btf_vmlinux = libbpf_find_kernel_btf();
- if (libbpf_get_error(btf_vmlinux))
+ if (!btf_vmlinux)
p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
return btf_vmlinux;
@@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
const char *st_ops_name;
kern_btf = get_btf_vmlinux();
- if (libbpf_get_error(kern_btf))
+ if (!kern_btf)
return "<btf_vmlinux_not_found>";
t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
@@ -63,10 +63,8 @@ static __s32 get_map_info_type_id(void)
return map_info_type_id;
kern_btf = get_btf_vmlinux();
- if (libbpf_get_error(kern_btf)) {
- map_info_type_id = PTR_ERR(kern_btf);
- return map_info_type_id;
- }
+ if (!kern_btf)
+ return 0;
map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info",
BTF_KIND_STRUCT);
@@ -415,7 +413,7 @@ static int do_dump(int argc, char **argv)
}
kern_btf = get_btf_vmlinux();
- if (libbpf_get_error(kern_btf))
+ if (!kern_btf)
return -1;
if (!json_output) {
@@ -498,7 +496,7 @@ static int do_register(int argc, char **argv)
open_opts.kernel_log_level = 1 + 2 + 4;
obj = bpf_object__open_file(file, &open_opts);
- if (libbpf_get_error(obj))
+ if (!obj)
return -1;
set_max_rlimit();
@@ -513,10 +511,9 @@ static int do_register(int argc, char **argv)
continue;
link = bpf_map__attach_struct_ops(map);
- if (libbpf_get_error(link)) {
+ if (!link) {
p_err("can't register struct_ops %s: %s",
- bpf_map__name(map),
- strerror(-PTR_ERR(link)));
+ bpf_map__name(map), strerror(errno));
nr_errs++;
continue;
}
@@ -593,8 +590,7 @@ int do_struct_ops(int argc, char **argv)
err = cmd_select(cmds, argc, argv, do_help);
- if (!libbpf_get_error(btf_vmlinux))
- btf__free(btf_vmlinux);
+ btf__free(btf_vmlinux);
return err;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fb4c911d2a03..f89de51a45db 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2584,14 +2584,19 @@ union bpf_attr {
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * **TCP_BPF_RTO_MIN**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * * **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -2647,7 +2652,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -2808,12 +2813,10 @@ union bpf_attr {
* and **BPF_CGROUP_INET6_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
- * It supports the following *level*\ s:
- *
- * * **IPPROTO_TCP**, which supports *optname*
- * **TCP_CONGESTION**.
- * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * It supports the same set of *optname*\ s that is supported by
+ * the **bpf_setsockopt**\ () helper. The exceptions are
+ * **TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -6888,6 +6891,16 @@ struct bpf_dynptr {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_list_head {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3bd812bf88ff..71e165b09ed5 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1724,7 +1724,8 @@ err_out:
memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
/* and now restore original strings section size; types data size
- * wasn't modified, so doesn't need restoring, see big comment above */
+ * wasn't modified, so doesn't need restoring, see big comment above
+ */
btf->hdr->str_len = old_strs_len;
hashmap__free(p.str_off_map);
@@ -2329,7 +2330,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
*/
int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
{
- if (!value|| !value[0])
+ if (!value || !value[0])
return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 12f7039e0ab2..deb2bc9a0a7b 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1543,7 +1543,7 @@ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
if (!new_name)
return 1;
- hashmap__find(name_map, orig_name, &dup_cnt);
+ (void)hashmap__find(name_map, orig_name, &dup_cnt);
dup_cnt++;
err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
@@ -1989,7 +1989,7 @@ static int btf_dump_struct_data(struct btf_dump *d,
{
const struct btf_member *m = btf_members(t);
__u16 n = btf_vlen(t);
- int i, err;
+ int i, err = 0;
/* note that we increment depth before calling btf_dump_print() below;
* this is intentional. btf_dump_data_newline() will not print a
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1d263885d635..93ccea238391 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -347,7 +347,8 @@ enum sec_def_flags {
SEC_ATTACHABLE = 2,
SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
/* attachment target is specified through BTF ID in either kernel or
- * other BPF program's BTF object */
+ * other BPF program's BTF object
+ */
SEC_ATTACH_BTF = 4,
/* BPF program type allows sleeping/blocking in kernel */
SEC_SLEEPABLE = 8,
@@ -488,7 +489,7 @@ struct bpf_map {
char *name;
/* real_name is defined for special internal maps (.rodata*,
* .data*, .bss, .kconfig) and preserves their original ELF section
- * name. This is important to be be able to find corresponding BTF
+ * name. This is important to be able to find corresponding BTF
* DATASEC information.
*/
char *real_name;
@@ -1863,12 +1864,20 @@ static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
return -ERANGE;
}
switch (ext->kcfg.sz) {
- case 1: *(__u8 *)ext_val = value; break;
- case 2: *(__u16 *)ext_val = value; break;
- case 4: *(__u32 *)ext_val = value; break;
- case 8: *(__u64 *)ext_val = value; break;
- default:
- return -EINVAL;
+ case 1:
+ *(__u8 *)ext_val = value;
+ break;
+ case 2:
+ *(__u16 *)ext_val = value;
+ break;
+ case 4:
+ *(__u32 *)ext_val = value;
+ break;
+ case 8:
+ *(__u64 *)ext_val = value;
+ break;
+ default:
+ return -EINVAL;
}
ext->is_set = true;
return 0;
@@ -2770,7 +2779,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
m->type = enum64_placeholder_id;
m->offset = 0;
}
- }
+ }
}
return 0;
@@ -3502,7 +3511,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RELO;
sec_desc->shdr = sh;
sec_desc->data = data;
- } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
+ } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
+ str_has_pfx(name, BSS_SEC "."))) {
sec_desc->sec_type = SEC_BSS;
sec_desc->shdr = sh;
sec_desc->data = data;
@@ -3518,7 +3528,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
}
/* sort BPF programs by section name and in-section instruction offset
- * for faster search */
+ * for faster search
+ */
if (obj->nr_programs)
qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
@@ -3817,7 +3828,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -EINVAL;
}
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
- &ext->kcfg.is_signed);
+ &ext->kcfg.is_signed);
if (ext->kcfg.type == KCFG_UNKNOWN) {
pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
return -ENOTSUP;
@@ -4965,9 +4976,9 @@ bpf_object__reuse_map(struct bpf_map *map)
err = bpf_map__reuse_fd(map, pin_fd);
close(pin_fd);
- if (err) {
+ if (err)
return err;
- }
+
map->pinned = true;
pr_debug("reused pinned map at '%s'\n", map->pin_path);
@@ -5485,7 +5496,7 @@ static int load_module_btfs(struct bpf_object *obj)
}
err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
- sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+ sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
if (err)
goto err_out;
@@ -6237,7 +6248,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* prog; each main prog can have a different set of
* subprograms appended (potentially in different order as
* well), so position of any subprog can be different for
- * different main programs */
+ * different main programs
+ */
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
@@ -10995,7 +11007,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
- usdt_provider, usdt_name, usdt_cookie);
+ usdt_provider, usdt_name, usdt_cookie);
err = libbpf_get_error(link);
if (err)
return libbpf_err_ptr(err);
@@ -12304,7 +12316,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
btf = bpf_object__btf(s->obj);
if (!btf) {
pr_warn("subskeletons require BTF at runtime (object %s)\n",
- bpf_object__name(s->obj));
+ bpf_object__name(s->obj));
return libbpf_err(-errno);
}
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index d285171d4b69..51808c5f0014 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -128,7 +128,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
/* Map read-only producer page and data pages. We map twice as big
* data size to allow simple reading of samples that wrap around the
* end of a ring buffer. See kernel implementation for details.
- * */
+ */
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
@@ -220,7 +220,7 @@ static inline int roundup_len(__u32 len)
return (len + 7) / 8 * 8;
}
-static int64_t ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring *r)
{
int *len_ptr, len, err;
/* 64-bit to avoid overflow in case of extreme application behavior */
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 09416d5d2e33..8e77515d56f6 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -38,12 +38,14 @@ kprobe_multi_test/skel_api # kprobe_multi__attach unexpect
ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF
ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2
libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
+linked_list
lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524)
lru_bug # lru_bug__attach unexpected error: -524 (errno 524)
modify_return # modify_return__attach failed unexpected error: -524 (errno 524)
module_attach # skel_attach skeleton attach failed: -524
mptcp/base # run_test mptcp unexpected error: -524 (errno 524)
netcnt # packets unexpected packets: actual 10001 != expected 10000
+rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22
recursion # skel_attach unexpected error: -524 (errno 524)
ringbuf # skel_attach skeleton attachment failed: -1
setget_sockopt # attach_cgroup unexpected error: -524
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index be4e3d47ea3e..648a8a1b6b78 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -10,6 +10,7 @@ bpf_nf # JIT does not support calling kernel f
bpf_tcp_ca # JIT does not support calling kernel function (kfunc)
cb_refs # expected error message unexpected error: -524 (trampoline)
cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc)
+cgrp_kfunc # JIT does not support calling kernel function
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
core_read_macros # unknown func bpf_probe_read#4 (overlapping)
d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
@@ -33,6 +34,7 @@ ksyms_module # test_ksyms_module__open_and_load unex
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
libbpf_get_fd_by_id_opts # failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
+linked_list # JIT does not support calling kernel function (kfunc)
lookup_key # JIT does not support calling kernel function (kfunc)
lru_bug # prog 'printk': failed to auto-attach: -524
map_kptr # failed to open_and_load program: -524 (trampoline)
@@ -41,6 +43,7 @@ module_attach # skel_attach skeleton attach failed: -
mptcp
netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
probe_user # check_kprobe_res wrong kprobe res from probe read (?)
+rcu_read_lock # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
recursion # skel_attach unexpected error: -524 (trampoline)
ringbuf # skel_load skeleton load failed (?)
select_reuseport # intermittently fails on new s390x setup
@@ -53,6 +56,7 @@ skc_to_unix_sock # could not attach BPF object unexpecte
socket_cookie # prog_attach unexpected error: -524 (trampoline)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?)
+task_kfunc # JIT does not support calling kernel function
task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline)
test_bpffs # bpffs test failed 255 (iterator)
test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
@@ -69,6 +73,7 @@ trace_printk # trace_printk__load unexpected error:
trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
tracing_struct # failed to auto-attach: -524 (trampoline)
trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
+type_cast # JIT does not support calling kernel function
unpriv_bpf_disabled # fentry
user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?)
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f3cd17026ee5..6a0f043dc410 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -201,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
- $(Q)$(MAKE) $(submake_extras) -C bpf_testmod
+ $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
$(Q)cp bpf_testmod/bpf_testmod.ko $@
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
@@ -310,9 +310,9 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
define get_sys_includes
-$(shell $(1) -v -E - </dev/null 2>&1 \
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
endef
# Determine target endianness.
@@ -320,7 +320,11 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
-I$(abspath $(OUTPUT)/../usr/include)
@@ -542,7 +546,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
# Define test_progs BPF-GCC-flavored test runner.
ifneq ($(BPF_GCC),)
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,)
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
endif
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
new file mode 100644
index 000000000000..424f7bbbfe9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -0,0 +1,68 @@
+#ifndef __BPF_EXPERIMENTAL__
+#define __BPF_EXPERIMENTAL__
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
+
+/* Description
+ * Allocates an object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is a hidden argument that is ignored.
+ * Returns
+ * A pointer to an object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_new_impl */
+#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is a hidden argument that is ignored.
+ * Returns
+ * Void.
+ */
+extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Add a new entry to the beginning of the BPF linked list.
+ * Returns
+ * Void.
+ */
+extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+
+/* Description
+ * Add a new entry to the end of the BPF linked list.
+ * Returns
+ * Void.
+ */
+extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+
+/* Description
+ * Remove the entry at the beginning of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
+
+/* Description
+ * Remove the entry at the end of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
+
+#endif
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index dd1aa5afcf5a..9e95b37a7dff 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -333,6 +333,25 @@ int get_root_cgroup(void)
return fd;
}
+/*
+ * remove_cgroup() - Remove a cgroup
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup(const char *relative_path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
* @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 3358734356ab..f099a166c94d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -18,6 +18,7 @@ int write_cgroup_file_parent(const char *relative_path, const char *file,
int cgroup_setup_and_join(const char *relative_path);
int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
+void remove_cgroup(const char *relative_path);
unsigned long long get_cgroup_id(const char *relative_path);
int join_cgroup(const char *relative_path);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 7a99a6728169..f9034ea00bc9 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -8,6 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y
CONFIG_BPF_LSM=y
CONFIG_BPF_STREAM_PARSER=y
CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_UNPRIV_DEFAULT_OFF=n
CONFIG_CGROUP_BPF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA256=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index bec15558fd93..1f37adff7632 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -426,6 +426,10 @@ static int setns_by_fd(int nsfd)
if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
return err;
+ err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/kernel/debug"))
+ return err;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 95a2b80f0d17..de1b5b9eb93a 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3949,6 +3949,20 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Invalid return type",
},
{
+ .descr = "decl_tag test #17, func proto, argument",
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */
+ BTF_FUNC_PROTO_ENC(0, 1), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1\0var"),
+ .btf_load_err = true,
+ .err_str = "Invalid arg#1",
+},
+{
.descr = "type_tag test #1",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index c4a2adb38da1..e02feb5fae97 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -189,6 +189,80 @@ static void test_walk_self_only(struct cgroup_iter *skel)
BPF_CGROUP_ITER_SELF_ONLY, "self_only");
}
+static void test_walk_dead_self_only(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ char expected_output[128], buf[128];
+ const char *cgrp_name = "/dead";
+ union bpf_iter_link_info linfo;
+ int len, cgrp_fd, iter_fd;
+ struct bpf_link *link;
+ size_t left;
+ char *p;
+
+ cgrp_fd = create_and_get_cgroup(cgrp_name);
+ if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
+ return;
+
+ /* The cgroup will be dead during read() iteration, so it only has
+ * epilogue in the output
+ */
+ snprintf(expected_output, sizeof(expected_output), EPILOGUE);
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgrp_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto close_cgrp;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto free_link;
+
+ /* Close link fd and cgroup fd */
+ bpf_link__destroy(link);
+ close(cgrp_fd);
+
+ /* Remove cgroup to mark it as dead */
+ remove_cgroup(cgrp_name);
+
+ /* Two kern_sync_rcu() and usleep() pairs are used to wait for the
+ * releases of cgroup css, and the last kern_sync_rcu() and usleep()
+ * pair is used to wait for the free of cgroup itself.
+ */
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(1000);
+
+ memset(buf, 0, sizeof(buf));
+ left = ARRAY_SIZE(buf);
+ p = buf;
+ while ((len = read(iter_fd, p, left)) > 0) {
+ p += len;
+ left -= len;
+ }
+
+ ASSERT_STREQ(buf, expected_output, "dead cgroup output");
+
+ /* read() after iter finishes should be ok. */
+ if (len == 0)
+ ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+ close(iter_fd);
+ return;
+free_link:
+ bpf_link__destroy(link);
+close_cgrp:
+ close(cgrp_fd);
+}
+
void test_cgroup_iter(void)
{
struct cgroup_iter *skel = NULL;
@@ -217,6 +291,8 @@ void test_cgroup_iter(void)
test_early_termination(skel);
if (test__start_subtest("cgroup_iter__self_only"))
test_walk_self_only(skel);
+ if (test__start_subtest("cgroup_iter__dead_self_only"))
+ test_walk_dead_self_only(skel);
out:
cgroup_iter__destroy(skel);
cleanup_cgroups();
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
new file mode 100644
index 000000000000..973f0c5af965
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <cgroup_helpers.h>
+#include <test_progs.h>
+
+#include "cgrp_kfunc_failure.skel.h"
+#include "cgrp_kfunc_success.skel.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void)
+{
+ struct cgrp_kfunc_success *skel;
+ int err;
+
+ skel = cgrp_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = cgrp_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static int mkdir_rm_test_dir(void)
+{
+ int fd;
+ const char *cgrp_path = "cgrp_kfunc";
+
+ fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd"))
+ return -1;
+
+ close(fd);
+ remove_cgroup(cgrp_path);
+
+ return 0;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count");
+ if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count");
+ ASSERT_OK(skel->bss->err, "post_rmdir_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ cgrp_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_cgrp_acquire_release_argument",
+ "test_cgrp_acquire_leave_in_map",
+ "test_cgrp_xchg_release",
+ "test_cgrp_get_release",
+ "test_cgrp_get_ancestors",
+};
+
+static struct {
+ const char *prog_name;
+ const char *expected_err_msg;
+} failure_tests[] = {
+ {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+ {"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
+ {"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
+ {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+ {"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"},
+ {"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
+ {"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
+ {"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
+ {"cgrp_kfunc_get_null", "arg#0 expected pointer to map value"},
+ {"cgrp_kfunc_xchg_unreleased", "Unreleased reference"},
+ {"cgrp_kfunc_get_unreleased", "Unreleased reference"},
+ {"cgrp_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
+ {"cgrp_kfunc_release_fp", "arg#0 pointer type STRUCT cgroup must point"},
+ {"cgrp_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+ {"cgrp_kfunc_release_unacquired", "release kernel function bpf_cgroup_release expects"},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct cgrp_kfunc_failure *skel;
+ int err, i;
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 1;
+
+ skel = cgrp_kfunc_failure__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "cgrp_kfunc_failure__open_opts"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ struct bpf_program *prog;
+ const char *curr_name = failure_tests[i].prog_name;
+
+ prog = bpf_object__find_program_by_name(skel->obj, curr_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
+ }
+
+ err = cgrp_kfunc_failure__load(skel);
+ if (!ASSERT_ERR(err, "unexpected load success"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+ fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+ fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+ }
+
+cleanup:
+ cgrp_kfunc_failure__destroy(skel);
+}
+
+void test_cgrp_kfunc(void)
+{
+ int i, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "cgrp_env_setup"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ if (!test__start_subtest(failure_tests[i].prog_name))
+ continue;
+
+ verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+ }
+
+cleanup:
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 8fc4e6c02bfd..b0c06f821cb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -17,7 +17,7 @@ static struct {
{"ringbuf_missing_release2", "Unreleased reference id=2"},
{"ringbuf_missing_release_callback", "Unreleased reference id"},
{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
- {"ringbuf_invalid_api", "type=mem expected=alloc_mem"},
+ {"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"},
{"add_dynptr_to_map1", "invalid indirect read from stack"},
{"add_dynptr_to_map2", "invalid indirect read from stack"},
{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
new file mode 100644
index 000000000000..0613f3bb8b5e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include "empty_skb.skel.h"
+
+#define SYS(cmd) ({ \
+ if (!ASSERT_OK(system(cmd), (cmd))) \
+ goto out; \
+})
+
+void serial_test_empty_skb(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
+ struct empty_skb *bpf_obj = NULL;
+ struct nstoken *tok = NULL;
+ struct bpf_program *prog;
+ char eth_hlen_pp[15];
+ char eth_hlen[14];
+ int veth_ifindex;
+ int ipip_ifindex;
+ int err;
+ int i;
+
+ struct {
+ const char *msg;
+ const void *data_in;
+ __u32 data_size_in;
+ int *ifindex;
+ int err;
+ int ret;
+ bool success_on_tc;
+ } tests[] = {
+ /* Empty packets are always rejected. */
+
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "veth empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &veth_ifindex,
+ .err = -EINVAL,
+ },
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "ipip empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &ipip_ifindex,
+ .err = -EINVAL,
+ },
+
+ /* ETH_HLEN-sized packets:
+ * - can not be redirected at LWT_XMIT
+ * - can be redirected at TC to non-tunneling dest
+ */
+
+ {
+ /* __bpf_redirect_common */
+ .msg = "veth ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &veth_ifindex,
+ .ret = -ERANGE,
+ .success_on_tc = true,
+ },
+ {
+ /* __bpf_redirect_no_mac
+ *
+ * lwt: skb->len=0 <= skb_network_offset=0
+ * tc: skb->len=14 <= skb_network_offset=14
+ */
+ .msg = "ipip ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &ipip_ifindex,
+ .ret = -ERANGE,
+ },
+
+ /* ETH_HLEN+1-sized packet should be redirected. */
+
+ {
+ .msg = "veth ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &veth_ifindex,
+ },
+ {
+ .msg = "ipip ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &ipip_ifindex,
+ },
+ };
+
+ SYS("ip netns add empty_skb");
+ tok = open_netns("empty_skb");
+ SYS("ip link add veth0 type veth peer veth1");
+ SYS("ip link set dev veth0 up");
+ SYS("ip link set dev veth1 up");
+ SYS("ip addr add 10.0.0.1/8 dev veth0");
+ SYS("ip addr add 10.0.0.2/8 dev veth1");
+ veth_ifindex = if_nametoindex("veth0");
+
+ SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+ SYS("ip link set ipip0 up");
+ SYS("ip addr add 192.168.1.1/16 dev ipip0");
+ ipip_ifindex = if_nametoindex("ipip0");
+
+ bpf_obj = empty_skb__open_and_load();
+ if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ bpf_object__for_each_program(prog, bpf_obj->obj) {
+ char buf[128];
+ bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+
+ tattr.data_in = tests[i].data_in;
+ tattr.data_size_in = tests[i].data_size_in;
+
+ tattr.data_size_out = 0;
+ bpf_obj->bss->ifindex = *tests[i].ifindex;
+ bpf_obj->bss->ret = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr);
+ sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog));
+
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(err, 0, buf);
+ else
+ ASSERT_EQ(err, tests[i].err, buf);
+ sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog));
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(bpf_obj->bss->ret, 0, buf);
+ else
+ ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf);
+ }
+ }
+
+out:
+ if (bpf_obj)
+ empty_skb__destroy(bpf_obj);
+ if (tok)
+ close_netns(tok);
+ system("ip netns del empty_skb");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index c210657d4d0a..55d641c1f126 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -22,7 +22,7 @@ static struct {
"arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
{"not_valid_dynptr",
"arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
- {"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0},
+ {"not_ptr_to_stack", "arg#0 expected pointer to stack", 0},
{"dynptr_data_null", NULL, -EBADMSG},
};
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
new file mode 100644
index 000000000000..9a7d4c47af63
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -0,0 +1,740 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+#include <test_btf.h>
+#include <linux/btf.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "linked_list.skel.h"
+#include "linked_list_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} linked_list_fail_tests[] = {
+#define TEST(test, off) \
+ { #test "_missing_lock_push_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_push_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
+ TEST(kptr, 32)
+ TEST(global, 16)
+ TEST(map, 0)
+ TEST(inner_map, 0)
+#undef TEST
+#define TEST(test, op) \
+ { #test "_kptr_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
+ { #test "_global_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
+ { #test "_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
+ { #test "_inner_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" },
+ TEST(kptr, push_front)
+ TEST(kptr, push_back)
+ TEST(kptr, pop_front)
+ TEST(kptr, pop_back)
+ TEST(global, push_front)
+ TEST(global, push_back)
+ TEST(global, pop_front)
+ TEST(global, pop_back)
+ TEST(map, push_front)
+ TEST(map, push_back)
+ TEST(map, pop_front)
+ TEST(map, pop_back)
+ TEST(inner_map, push_front)
+ TEST(inner_map, push_back)
+ TEST(inner_map, pop_front)
+ TEST(inner_map, pop_back)
+#undef TEST
+ { "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
+ { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
+ { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
+ { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
+ { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
+ { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
+ { "obj_new_acq", "Unreleased reference id=" },
+ { "use_after_drop", "invalid mem access 'scalar'" },
+ { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+ { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_read_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_write_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_read_node", "direct access to bpf_list_node is disallowed" },
+ { "direct_write_node", "direct access to bpf_list_node is disallowed" },
+ { "write_after_push_front", "only read is supported" },
+ { "write_after_push_back", "only read is supported" },
+ { "use_after_unlock_push_front", "invalid mem access 'scalar'" },
+ { "use_after_unlock_push_back", "invalid mem access 'scalar'" },
+ { "double_push_front", "arg#1 expected pointer to allocated object" },
+ { "double_push_back", "arg#1 expected pointer to allocated object" },
+ { "no_node_value_type", "bpf_list_node not found at offset=0" },
+ { "incorrect_value_type",
+ "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, "
+ "but arg is at offset=0 in struct bar" },
+ { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_node_off1", "bpf_list_node not found at offset=1" },
+ { "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" },
+ { "no_head_type", "bpf_list_head not found at offset=0" },
+ { "incorrect_head_var_off1", "R1 doesn't have constant offset" },
+ { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_head_off1", "bpf_list_head not found at offset=17" },
+ { "incorrect_head_off2", "bpf_list_head not found at offset=1" },
+ { "pop_front_off",
+ "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
+ "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+ "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+ { "pop_back_off",
+ "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
+ "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+ "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+};
+
+static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct linked_list_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = linked_list_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = linked_list_fail__load(skel);
+ if (!ASSERT_ERR(ret, "linked_list_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ linked_list_fail__destroy(skel);
+}
+
+static void clear_fields(struct bpf_map *map)
+{
+ char buf[24];
+ int key = 0;
+
+ memset(buf, 0xff, sizeof(buf));
+ ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields");
+}
+
+enum {
+ TEST_ALL,
+ PUSH_POP,
+ PUSH_POP_MULT,
+ LIST_IN_LIST,
+};
+
+static void test_linked_list_success(int mode, bool leave_in_map)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct linked_list *skel;
+ int ret;
+
+ skel = linked_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load"))
+ return;
+
+ if (mode == LIST_IN_LIST)
+ goto lil;
+ if (mode == PUSH_POP_MULT)
+ goto ppm;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts);
+ ASSERT_OK(ret, "map_list_push_pop");
+ ASSERT_OK(opts.retval, "map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
+ ASSERT_OK(ret, "global_list_push_pop");
+ ASSERT_OK(opts.retval, "global_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP)
+ goto end;
+
+ppm:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "global_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP_MULT)
+ goto end;
+
+lil:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts);
+ ASSERT_OK(ret, "map_list_in_list");
+ ASSERT_OK(opts.retval, "map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts);
+ ASSERT_OK(ret, "inner_map_list_in_list");
+ ASSERT_OK(opts.retval, "inner_map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
+ ASSERT_OK(ret, "global_list_in_list");
+ ASSERT_OK(opts.retval, "global_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+end:
+ linked_list__destroy(skel);
+}
+
+#define SPIN_LOCK 2
+#define LIST_HEAD 3
+#define LIST_NODE 4
+
+static struct btf *init_btf(void)
+{
+ int id, lid, hid, nid;
+ struct btf *btf;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf__new_empty"))
+ return NULL;
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ if (!ASSERT_EQ(id, 1, "btf__add_int"))
+ goto end;
+ lid = btf__add_struct(btf, "bpf_spin_lock", 4);
+ if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock"))
+ goto end;
+ hid = btf__add_struct(btf, "bpf_list_head", 16);
+ if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
+ goto end;
+ nid = btf__add_struct(btf, "bpf_list_node", 16);
+ if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
+ goto end;
+ return btf;
+end:
+ btf__free(btf);
+ return NULL;
+}
+
+static void test_btf(void)
+{
+ struct btf *btf = NULL;
+ int id, err;
+
+ while (test__start_subtest("btf: too many locks")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "c", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -E2BIG, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing lock")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 16);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 16);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: bad offset")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EEXIST, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing contains:")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing struct")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing node")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ btf__free(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ break;
+ }
+
+ while (test__start_subtest("btf: node incorrect type")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 4);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: multiple bpf_list_node with name b")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 52);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::d"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned AA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned ABA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 36);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 16);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 36);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 16);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 36);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 16);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 36);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
+ break;
+ id = btf__add_struct(btf, "baz", 36);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
+ if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
+ break;
+ id = btf__add_struct(btf, "bam", 16);
+ if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bam::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+}
+
+void test_linked_list(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) {
+ if (!test__start_subtest(linked_list_fail_tests[i].prog_name))
+ continue;
+ test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name,
+ linked_list_fail_tests[i].err_msg);
+ }
+ test_btf();
+ test_linked_list_success(PUSH_POP, false);
+ test_linked_list_success(PUSH_POP, true);
+ test_linked_list_success(PUSH_POP_MULT, false);
+ test_linked_list_success(PUSH_POP_MULT, true);
+ test_linked_list_success(LIST_IN_LIST, false);
+ test_linked_list_success(LIST_IN_LIST, true);
+ test_linked_list_success(TEST_ALL, false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
index 1102e4f42d2d..f117bfef68a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -173,10 +173,12 @@ static void test_lsm_cgroup_functional(void)
ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
- /* AF_UNIX is prohibited. */
-
fd = socket(AF_UNIX, SOCK_STREAM, 0);
- ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+ if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK))
+ /* AF_UNIX is prohibited. */
+ ASSERT_LT(fd, 0, "socket(AF_UNIX)");
close(fd);
/* AF_INET6 gets default policy (sk_priority). */
@@ -233,11 +235,18 @@ static void test_lsm_cgroup_functional(void)
/* AF_INET6+SOCK_STREAM
* AF_PACKET+SOCK_RAW
+ * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed
* listen_fd
* client_fd
* accepted_fd
*/
- ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+ if (skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK)
+ /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2");
+ else
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
/* start_server
* bind(ETH_P_ALL)
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
new file mode 100644
index 000000000000..447d8560ecb6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "rcu_read_lock.skel.h"
+#include "cgroup_helpers.h"
+
+static unsigned long long cgroup_id;
+
+static void test_success(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
+ bpf_program__set_autoload(skel->progs.task_succ, true);
+ bpf_program__set_autoload(skel->progs.no_lock, true);
+ bpf_program__set_autoload(skel->progs.two_regions, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val");
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static void test_rcuptr_acquire(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.task_acquire, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ ASSERT_OK(err, "skel_attach");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static const char * const inproper_region_tests[] = {
+ "miss_lock",
+ "miss_unlock",
+ "non_sleepable_rcu_mismatch",
+ "inproper_sleepable_helper",
+ "inproper_sleepable_kfunc",
+ "nested_rcu_region",
+};
+
+static void test_inproper_region(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+static const char * const rcuptr_misuse_tests[] = {
+ "task_untrusted_non_rcuptr",
+ "task_untrusted_rcuptr",
+ "cross_rcu_region",
+};
+
+static void test_rcuptr_misuse(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+void test_rcu_read_lock(void)
+{
+ struct btf *vmlinux_btf;
+ int cgroup_fd;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+ if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) {
+ test__skip();
+ goto out;
+ }
+
+ cgroup_fd = test__join_cgroup("/rcu_read_lock");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
+ goto out;
+
+ cgroup_id = get_cgroup_id("/rcu_read_lock");
+ if (test__start_subtest("success"))
+ test_success();
+ if (test__start_subtest("rcuptr_acquire"))
+ test_rcuptr_acquire();
+ if (test__start_subtest("negative_tests_inproper_region"))
+ test_inproper_region();
+ if (test__start_subtest("negative_tests_rcuptr_misuse"))
+ test_rcuptr_misuse();
+ close(cgroup_fd);
+out:
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
new file mode 100644
index 000000000000..d9270bd3d920
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_spin_lock.skel.h"
+#include "test_spin_lock_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} spin_lock_fail_tests[] = {
+ { "lock_id_kptr_preserve",
+ "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
+ "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=ptr_ expected=percpu_ptr_" },
+ { "lock_id_global_zero",
+ "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mapval_preserve",
+ "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) "
+ "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_innermapval_preserve",
+ "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) "
+ "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+};
+
+static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_spin_lock_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_spin_lock_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_spin_lock_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail"))
+ goto end;
+
+ /* Skip check if JIT does not support kfuncs */
+ if (strstr(log_buf, "JIT does not support calling kernel function")) {
+ test__skip();
+ goto end;
+ }
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_spin_lock_fail__destroy(skel);
+}
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ pthread_exit(arg);
+}
+
+void test_spin_lock_success(void)
+{
+ struct test_spin_lock *skel;
+ pthread_t thread_id[4];
+ int prog_fd, i;
+ void *ret;
+
+ skel = test_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test);
+ for (i = 0; i < 4; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+end:
+ test_spin_lock__destroy(skel);
+}
+
+void test_spin_lock(void)
+{
+ int i;
+
+ test_spin_lock_success();
+
+ for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) {
+ if (!test__start_subtest(spin_lock_fail_tests[i].prog_name))
+ continue;
+ test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name,
+ spin_lock_fail_tests[i].err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
deleted file mode 100644
index 15eb1372d771..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-static void *spin_lock_thread(void *arg)
-{
- int err, prog_fd = *(u32 *) arg;
- LIBBPF_OPTS(bpf_test_run_opts, topts,
- .data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
- .repeat = 10000,
- );
-
- err = bpf_prog_test_run_opts(prog_fd, &topts);
- ASSERT_OK(err, "test_run");
- ASSERT_OK(topts.retval, "test_run retval");
- pthread_exit(arg);
-}
-
-void test_spinlock(void)
-{
- const char *file = "./test_spin_lock.bpf.o";
- pthread_t thread_id[4];
- struct bpf_object *obj = NULL;
- int prog_fd;
- int err = 0, i;
- void *ret;
-
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (CHECK_FAIL(err)) {
- printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno);
- goto close_prog;
- }
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd)))
- goto close_prog;
-
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
- ret != (void *)&prog_fd))
- goto close_prog;
-close_prog:
- bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
new file mode 100644
index 000000000000..ffd8ef4303c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <unistd.h>
+
+#include "task_kfunc_failure.skel.h"
+#include "task_kfunc_success.skel.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct task_kfunc_success *open_load_task_kfunc_skel(void)
+{
+ struct task_kfunc_success *skel;
+ int err;
+
+ skel = task_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = task_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ task_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct task_kfunc_success *skel;
+ int status;
+ pid_t child_pid;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_task_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_spawn_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ task_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_task_acquire_release_argument",
+ "test_task_acquire_release_current",
+ "test_task_acquire_leave_in_map",
+ "test_task_xchg_release",
+ "test_task_get_release",
+ "test_task_current_acquire_release",
+ "test_task_from_pid_arg",
+ "test_task_from_pid_current",
+ "test_task_from_pid_invalid",
+};
+
+static struct {
+ const char *prog_name;
+ const char *expected_err_msg;
+} failure_tests[] = {
+ {"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+ {"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
+ {"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+ {"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_unreleased", "Unreleased reference"},
+ {"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
+ {"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
+ {"task_kfunc_get_null", "arg#0 expected pointer to map value"},
+ {"task_kfunc_xchg_unreleased", "Unreleased reference"},
+ {"task_kfunc_get_unreleased", "Unreleased reference"},
+ {"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
+ {"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+ {"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
+ {"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct task_kfunc_failure *skel;
+ int err, i;
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 1;
+
+ skel = task_kfunc_failure__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ struct bpf_program *prog;
+ const char *curr_name = failure_tests[i].prog_name;
+
+ prog = bpf_object__find_program_by_name(skel->obj, curr_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
+ }
+
+ err = task_kfunc_failure__load(skel);
+ if (!ASSERT_ERR(err, "unexpected load success"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+ fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+ fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+ }
+
+cleanup:
+ task_kfunc_failure__destroy(skel);
+}
+
+void test_task_kfunc(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ if (!test__start_subtest(failure_tests[i].prog_name))
+ continue;
+
+ verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c
new file mode 100644
index 000000000000..9317d5fa2635
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "type_cast.skel.h"
+
+static void test_xdp(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+ char buf[128];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_xdp, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_xdp);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval");
+
+ ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
+ ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex");
+ ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
+ ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static void test_tc(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_skb, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_skb);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "tc test_run retval");
+
+ ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len");
+ ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len");
+ ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len");
+ ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len");
+ ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static const char * const negative_tests[] = {
+ "untrusted_ptr",
+ "kctx_u64",
+};
+
+static void test_negative(void)
+{
+ struct bpf_program *prog;
+ struct type_cast *skel;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(negative_tests); i++) {
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = type_cast__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ type_cast__destroy(skel);
+ }
+}
+
+void test_type_cast(void)
+{
+ if (test__start_subtest("xdp"))
+ test_xdp();
+ if (test__start_subtest("tc"))
+ test_tc();
+ if (test__start_subtest("negative"))
+ test_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index a50971c6cf4a..9ac6f6a268db 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd)
}
#define NUM_PKTS 10000
-void test_xdp_do_redirect(void)
+void serial_test_xdp_do_redirect(void)
{
int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
char data[sizeof(pkt_udp) + sizeof(__u32)];
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index c72083885b6d..13daa3746064 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -174,7 +174,7 @@ out:
system("ip netns del synproxy");
}
-void test_xdp_synproxy(void)
+void serial_test_xdp_synproxy(void)
{
if (test__start_subtest("xdp"))
test_synproxy(true);
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
new file mode 100644
index 000000000000..7d30855bfe78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CGRP_KFUNC_COMMON_H
+#define _CGRP_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __cgrps_kfunc_map_value {
+ struct cgroup __kptr_ref * cgrp;
+};
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __cgrps_kfunc_map_value);
+ __uint(max_entries, 1);
+} __cgrps_kfunc_map SEC(".maps");
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+
+static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
+{
+ s32 id;
+ long status;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+}
+
+static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return status;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+ if (old) {
+ bpf_cgroup_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CGRP_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
new file mode 100644
index 000000000000..a1369b5ebcf8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp)
+{
+ int status;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ return NULL;
+
+ return cgrps_kfunc_map_value_lookup(cgrp);
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */
+ acquired = bpf_cgroup_acquire(v->cgrp);
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path;
+
+ /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */
+ acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp);
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/cgroup_destroy_locked")
+int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
+{
+ struct cgroup *acquired;
+
+ /* Can't acquire an untrusted struct cgroup * pointer. */
+ acquired = bpf_cgroup_acquire(cgrp);
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */
+ acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp);
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */
+ acquired = bpf_cgroup_acquire(NULL);
+ if (!acquired)
+ return 0;
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+
+ /* Acquired cgroup is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+
+ /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */
+ kptr = bpf_cgroup_kptr_get(&cgrp);
+ if (!kptr)
+ return 0;
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr, *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+
+ /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */
+ kptr = bpf_cgroup_kptr_get(&acquired);
+ bpf_cgroup_release(acquired);
+ if (!kptr)
+ return 0;
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+
+ /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */
+ kptr = bpf_cgroup_kptr_get(NULL);
+ if (!kptr)
+ return 0;
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ kptr = bpf_cgroup_kptr_get(&v->cgrp);
+ if (!kptr)
+ return 0;
+
+ /* Kptr acquired above is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */
+ bpf_cgroup_release(v->cgrp);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired = (struct cgroup *)&path;
+
+ /* Cannot release random frame pointer. */
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return 0;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+
+ /* old cannot be passed to bpf_cgroup_release() without a NULL check. */
+ bpf_cgroup_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path)
+{
+ /* Cannot release trusted cgroup pointer which was not acquired. */
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
new file mode 100644
index 000000000000..0c23ea32df9f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid, invocations;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ bool same = pid == cur_pid;
+
+ if (same)
+ __sync_fetch_and_add(&invocations, 1);
+
+ return same;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_cgroup_kptr_get(&v->cgrp);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *self, *ancestor1, *invalid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ self = bpf_cgroup_ancestor(cgrp, cgrp->level);
+ if (!self) {
+ err = 1;
+ return 0;
+ }
+
+ if (self->self.id != cgrp->self.id) {
+ bpf_cgroup_release(self);
+ err = 2;
+ return 0;
+ }
+ bpf_cgroup_release(self);
+
+ ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!ancestor1) {
+ err = 3;
+ return 0;
+ }
+ bpf_cgroup_release(ancestor1);
+
+ invalid = bpf_cgroup_ancestor(cgrp, 10000);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 4;
+ return 0;
+ }
+
+ invalid = bpf_cgroup_ancestor(cgrp, -1);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 5;
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c
new file mode 100644
index 000000000000..4b0cd6753251
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/empty_skb.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ifindex;
+int ret;
+
+SEC("lwt_xmit")
+int redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("lwt_xmit")
+int redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
new file mode 100644
index 000000000000..2c7b615c6d41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#include "linked_list.h"
+
+static __always_inline
+int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ bpf_obj_drop(f);
+ return 3;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ bpf_obj_drop(f);
+ return 4;
+ }
+
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node);
+ bpf_spin_unlock(lock);
+ if (leave_in_map)
+ return 0;
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ f = container_of(n, struct foo, node);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 13;
+ bpf_list_push_front(head, &f->node);
+ bpf_spin_unlock(lock);
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 7;
+ f = container_of(n, struct foo, node);
+ if (f->data != 13) {
+ bpf_obj_drop(f);
+ return 8;
+ }
+ bpf_obj_drop(f);
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ return 9;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ return 10;
+ }
+ return 0;
+}
+
+
+static __always_inline
+int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f[8], *pf;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ f[i] = bpf_obj_new(typeof(**f));
+ if (!f[i])
+ return 2;
+ f[i]->data = i;
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &f[i]->node);
+ bpf_spin_unlock(lock);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 3;
+ pf = container_of(n, struct foo, node);
+ if (pf->data != (ARRAY_SIZE(f) - i - 1)) {
+ bpf_obj_drop(pf);
+ return 4;
+ }
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &pf->node);
+ bpf_spin_unlock(lock);
+ }
+
+ if (leave_in_map)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ pf = container_of(n, struct foo, node);
+ if (pf->data != i) {
+ bpf_obj_drop(pf);
+ return 6;
+ }
+ bpf_obj_drop(pf);
+ }
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ return 7;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node));
+ return 8;
+ }
+ return 0;
+}
+
+static __always_inline
+int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct bar *ba[8], *b;
+ struct foo *f;
+ int i;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 3;
+ }
+ b->data = i;
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_back(&f->head, &b->node);
+ bpf_spin_unlock(&f->lock);
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node);
+ bpf_spin_unlock(lock);
+
+ if (leave_in_map)
+ return 0;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 4;
+ f = container_of(n, struct foo, node);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 5;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (!n) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+ b = container_of(n, struct bar, node);
+ if (b->data != i) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(b);
+ return 7;
+ }
+ bpf_obj_drop(b);
+ }
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (n) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(container_of(n, struct bar, node));
+ return 8;
+ }
+ bpf_obj_drop(f);
+ return 0;
+}
+
+static __always_inline
+int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop(lock, head, false);
+ if (ret)
+ return ret;
+ return list_push_pop(lock, head, true);
+}
+
+static __always_inline
+int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(lock ,head, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(lock, head, true);
+}
+
+static __always_inline
+int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_in_list(lock, head, false);
+ if (ret)
+ return ret;
+ return list_in_list(lock, head, true);
+}
+
+SEC("tc")
+int map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop(void *ctx)
+{
+ return test_list_push_pop(&glock, &ghead);
+}
+
+SEC("tc")
+int map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+ int ret;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+ int ret;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop_multiple(void *ctx)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(&glock, &ghead, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(&glock, &ghead, true);
+}
+
+SEC("tc")
+int map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+ int ret;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+ int ret;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_in_list(void *ctx)
+{
+ return test_list_in_list(&glock, &ghead);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
new file mode 100644
index 000000000000..3fb2412552fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef LINKED_LIST_H
+#define LINKED_LIST_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct bar {
+ struct bpf_list_node node;
+ int data;
+};
+
+struct foo {
+ struct bpf_list_node node;
+ struct bpf_list_head head __contains(bar, node);
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_node node2;
+};
+
+struct map_value {
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_head head __contains(foo, node);
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+};
+
+struct array_map array_map SEC(".maps");
+struct array_map inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(foo, node);
+private(B) struct bpf_spin_lock glock2;
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
new file mode 100644
index 000000000000..1d9017240e19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#include "linked_list.h"
+
+#define INIT \
+ struct map_value *v, *v2, *iv, *iv2; \
+ struct foo *f, *f1, *f2; \
+ struct bar *b; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v) \
+ return 0; \
+ v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v2) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv) \
+ return 0; \
+ iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv2) \
+ return 0; \
+ f = bpf_obj_new(typeof(*f)); \
+ if (!f) \
+ return 0; \
+ f1 = f; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ b = bpf_obj_new(typeof(*b)); \
+ if (!b) { \
+ bpf_obj_drop(f2); \
+ bpf_obj_drop(f1); \
+ return 0; \
+ }
+
+#define CHECK(test, op, hexpr) \
+ SEC("?tc") \
+ int test##_missing_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ p(hexpr); \
+ return 0; \
+ }
+
+CHECK(kptr, push_front, &f->head);
+CHECK(kptr, push_back, &f->head);
+CHECK(kptr, pop_front, &f->head);
+CHECK(kptr, pop_back, &f->head);
+
+CHECK(global, push_front, &ghead);
+CHECK(global, push_back, &ghead);
+CHECK(global, pop_front, &ghead);
+CHECK(global, pop_back, &ghead);
+
+CHECK(map, push_front, &v->head);
+CHECK(map, push_back, &v->head);
+CHECK(map, pop_front, &v->head);
+CHECK(map, pop_back, &v->head);
+
+CHECK(inner_map, push_front, &iv->head);
+CHECK(inner_map, push_back, &iv->head);
+CHECK(inner_map, pop_front, &iv->head);
+CHECK(inner_map, pop_back, &iv->head);
+
+#undef CHECK
+
+#define CHECK(test, op, lexpr, hexpr) \
+ SEC("?tc") \
+ int test##_incorrect_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ bpf_spin_lock(lexpr); \
+ p(hexpr); \
+ return 0; \
+ }
+
+#define CHECK_OP(op) \
+ CHECK(kptr_kptr, op, &f1->lock, &f2->head); \
+ CHECK(kptr_global, op, &f1->lock, &ghead); \
+ CHECK(kptr_map, op, &f1->lock, &v->head); \
+ CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \
+ \
+ CHECK(global_global, op, &glock2, &ghead); \
+ CHECK(global_kptr, op, &glock, &f1->head); \
+ CHECK(global_map, op, &glock, &v->head); \
+ CHECK(global_inner_map, op, &glock, &iv->head); \
+ \
+ CHECK(map_map, op, &v->lock, &v2->head); \
+ CHECK(map_kptr, op, &v->lock, &f2->head); \
+ CHECK(map_global, op, &v->lock, &ghead); \
+ CHECK(map_inner_map, op, &v->lock, &iv->head); \
+ \
+ CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
+ CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \
+ CHECK(inner_map_global, op, &iv->lock, &ghead); \
+ CHECK(inner_map_map, op, &iv->lock, &v->head);
+
+CHECK_OP(push_front);
+CHECK_OP(push_back);
+CHECK_OP(pop_front);
+CHECK_OP(pop_back);
+
+#undef CHECK
+#undef CHECK_OP
+#undef INIT
+
+SEC("?kprobe/xyz")
+int map_compat_kprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?kretprobe/xyz")
+int map_compat_kretprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tracepoint/xyz")
+int map_compat_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?perf_event")
+int map_compat_perf(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp/xyz")
+int map_compat_raw_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp.w/xyz")
+int map_compat_raw_tp_w(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_type_id_oor(void *ctx)
+{
+ bpf_obj_new_impl(~0UL, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_composite(void *ctx)
+{
+ bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_struct(void *ctx)
+{
+
+ bpf_obj_new(union { int data; unsigned udata; });
+ return 0;
+}
+
+SEC("?tc")
+int obj_drop_non_zero_off(void *ctx)
+{
+ void *f;
+
+ f = bpf_obj_new(struct foo);
+ if (!f)
+ return 0;
+ bpf_obj_drop(f+1);
+ return 0;
+}
+
+SEC("?tc")
+int new_null_ret(void *ctx)
+{
+ return bpf_obj_new(struct foo)->data;
+}
+
+SEC("?tc")
+int obj_new_acq(void *ctx)
+{
+ bpf_obj_new(struct foo);
+ return 0;
+}
+
+SEC("?tc")
+int use_after_drop(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_obj_drop(f);
+ return f->data;
+}
+
+SEC("?tc")
+int ptr_walk_scalar(void *ctx)
+{
+ struct test1 {
+ struct test2 {
+ struct test2 *next;
+ } *ptr;
+ } *p;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_this_cpu_ptr(p->ptr);
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->lock;
+}
+
+SEC("?tc")
+int direct_write_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->lock = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->head;
+}
+
+SEC("?tc")
+int direct_write_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->head = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->node;
+}
+
+SEC("?tc")
+int direct_write_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->node = 0;
+ return 0;
+}
+
+static __always_inline
+int write_after_op(void (*push_op)(void *head, void *node))
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ push_op(&ghead, &f->node);
+ f->data = 42;
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int write_after_push_front(void *ctx)
+{
+ return write_after_op((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int write_after_push_back(void *ctx)
+{
+ return write_after_op((void *)bpf_list_push_back);
+}
+
+static __always_inline
+int use_after_unlock(void (*op)(void *head, void *node))
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ f->data = 42;
+ op(&ghead, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return f->data;
+}
+
+SEC("?tc")
+int use_after_unlock_push_front(void *ctx)
+{
+ return use_after_unlock((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int use_after_unlock_push_back(void *ctx)
+{
+ return use_after_unlock((void *)bpf_list_push_back);
+}
+
+static __always_inline
+int list_double_add(void (*op)(void *head, void *node))
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ op(&ghead, &f->node);
+ op(&ghead, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int double_push_front(void *ctx)
+{
+ return list_double_add((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int double_push_back(void *ctx)
+{
+ return list_double_add((void *)bpf_list_push_back);
+}
+
+SEC("?tc")
+int no_node_value_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(struct { int data; });
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, p);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_value_type(void *ctx)
+{
+ struct bar *b;
+
+ b = bpf_obj_new(typeof(*b));
+ if (!b)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &b->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_var_off(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off1(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node + 1);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off2(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int no_head_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(typeof(struct { int data; }));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(p, NULL);
+ bpf_spin_lock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off1(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off2(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off1(void *ctx)
+{
+ struct foo *f;
+ struct bar *b;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 0;
+ }
+
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_front((void *)&f->head + 1, &b->node);
+ bpf_spin_unlock(&f->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off2(void *ctx)
+{
+ struct foo *f;
+ struct bar *b;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + 1, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+static __always_inline
+int pop_ptr_off(void *(*op)(void *head))
+{
+ struct {
+ struct bpf_list_head head __contains(foo, node2);
+ struct bpf_spin_lock lock;
+ } *p;
+ struct bpf_list_node *n;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&p->lock);
+ n = op(&p->head);
+ bpf_spin_unlock(&p->lock);
+
+ bpf_this_cpu_ptr(n);
+ return 0;
+}
+
+SEC("?tc")
+int pop_front_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_front);
+}
+
+SEC("?tc")
+int pop_back_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_back);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
index 4f2d60b87b75..02c11d16b692 100644
--- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -7,6 +7,10 @@
char _license[] SEC("license") = "GPL";
+extern bool CONFIG_SECURITY_SELINUX __kconfig __weak;
+extern bool CONFIG_SECURITY_SMACK __kconfig __weak;
+extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak;
+
#ifndef AF_PACKET
#define AF_PACKET 17
#endif
@@ -140,6 +144,10 @@ SEC("lsm_cgroup/sk_alloc_security")
int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
{
called_socket_alloc++;
+ /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */
+ if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR)
+ return 1;
+
if (family == AF_UNIX)
return 0; /* EPERM */
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
new file mode 100644
index 000000000000..94a970076b98
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__u32 user_data, key_serial, target_pid;
+__u64 flags, task_storage_val, cgroup_id;
+
+struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+void bpf_key_put(struct bpf_key *key) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int get_cgroup_id(void *ctx)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* simulate bpf_get_current_cgroup_id() helper */
+ bpf_rcu_read_lock();
+ cgroup_id = task->cgroups->dfl_cgrp->kn->id;
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_succ(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ long init_val = 2;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ /* region including helper using rcu ptr real_parent */
+ real_parent = task->real_parent;
+ ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ goto out;
+ ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (!ptr)
+ goto out;
+ task_storage_val = *ptr;
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int no_lock(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* no bpf_rcu_read_lock(), old code still works */
+ task = bpf_get_current_task_btf();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int two_regions(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* two regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_1(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_2(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ bpf_rcu_read_lock();
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int task_acquire(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ /* acquire a reference which can be used outside rcu read lock region */
+ real_parent = bpf_task_acquire(real_parent);
+ bpf_rcu_read_unlock();
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_task_release(real_parent);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct css_set *cgroups;
+ struct cgroup *dfl_cgrp;
+
+ /* missing bpf_rcu_read_lock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_unlock(void *ctx)
+{
+ struct task_struct *task;
+ struct css_set *cgroups;
+ struct cgroup *dfl_cgrp;
+
+ /* missing bpf_rcu_read_unlock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_rcu_mismatch(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ /* non-sleepable: missing bpf_rcu_read_unlock() in one path */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (real_parent)
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int inproper_sleepable_helper(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ struct pt_regs *regs;
+ __u32 value = 0;
+ void *ptr;
+
+ task = bpf_get_current_task_btf();
+ /* sleepable helper in rcu read lock region */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
+ if (!regs) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ ptr = (void *)PT_REGS_IP(regs);
+ (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
+ user_data = value;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct bpf_key *bkey;
+
+ /* sleepable kfunc in rcu read lock region */
+ bpf_rcu_read_lock();
+ bkey = bpf_lookup_user_key(key_serial, flags);
+ bpf_rcu_read_unlock();
+ if (!bkey)
+ return -1;
+ bpf_key_put(bkey);
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_non_rcuptr(void *ctx)
+{
+ struct task_struct *task, *last_wakee;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ /* the pointer last_wakee marked as untrusted */
+ last_wakee = task->real_parent->last_wakee;
+ (void)bpf_task_storage_get(&map_a, last_wakee, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_rcuptr(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ /* helper use of rcu ptr outside the rcu read lock region */
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int cross_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* rcu ptr define/use in different regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
new file mode 100644
index 000000000000..c0ffd171743e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TASK_KFUNC_COMMON_H
+#define _TASK_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __tasks_kfunc_map_value {
+ struct task_struct __kptr_ref * task;
+};
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __tasks_kfunc_map_value);
+ __uint(max_entries, 1);
+} __tasks_kfunc_map SEC(".maps");
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+
+static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
+{
+ s32 pid;
+ long status;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+}
+
+static inline int tasks_kfunc_map_insert(struct task_struct *p)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return status;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v) {
+ bpf_map_delete_elem(&__tasks_kfunc_map, &pid);
+ return -ENOENT;
+ }
+
+ acquired = bpf_task_acquire(p);
+ old = bpf_kptr_xchg(&v->task, acquired);
+ if (old) {
+ bpf_task_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _TASK_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
new file mode 100644
index 000000000000..e310473190d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
+{
+ int status;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ return NULL;
+
+ return tasks_kfunc_map_value_lookup(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on an untrusted pointer. */
+ acquired = bpf_task_acquire(v->task);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
+
+ /* Can't invoke bpf_task_acquire() on a random frame pointer. */
+ acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/free_task")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ /* Can't release a bpf_task_acquire()'d task without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */
+ acquired = bpf_task_acquire(task->last_wakee);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't invoke bpf_task_acquire() on a NULL pointer. */
+ acquired = bpf_task_acquire(NULL);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+
+ /* Acquired task is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+
+ /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */
+ kptr = bpf_task_kptr_get(&task);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr, *acquired;
+
+ acquired = bpf_task_acquire(task);
+
+ /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */
+ kptr = bpf_task_kptr_get(&acquired);
+ bpf_task_release(acquired);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+
+ /* Cannot use bpf_task_kptr_get() on a NULL pointer. */
+ kptr = bpf_task_kptr_get(NULL);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_task_kptr_get(&v->task);
+ if (!kptr)
+ return 0;
+
+ /* Kptr acquired above is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_release() on an untrusted pointer. */
+ bpf_task_release(v->task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = (struct task_struct *)&clone_flags;
+
+ /* Cannot release random frame pointer. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
+ if (status)
+ return 0;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_task_acquire(task);
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+
+ /* old cannot be passed to bpf_task_release() without a NULL check. */
+ bpf_task_release(old);
+ bpf_task_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
+{
+ /* Cannot release trusted task pointer which was not acquired. */
+ bpf_task_release(task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(task->pid);
+
+ /* Releasing bpf_task_from_pid() lookup without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
new file mode 100644
index 000000000000..60c7ead41cfc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+static int test_acquire_release(struct task_struct *task)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_task_kptr_get(&v->task);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *current, *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ current = bpf_get_current_task_btf();
+ acquired = bpf_task_acquire(current);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+static void lookup_compare_pid(const struct task_struct *p)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(p->pid);
+ if (!acquired) {
+ err = 1;
+ return;
+ }
+
+ if (acquired->pid != p->pid)
+ err = 2;
+ bpf_task_release(acquired);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(task);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *current, *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(bpf_get_current_task_btf());
+ return 0;
+}
+
+static int is_pid_lookup_valid(s32 pid)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(pid);
+ if (acquired) {
+ bpf_task_release(acquired);
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ if (is_pid_lookup_valid(-1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (is_pid_lookup_valid(0xcafef00d)) {
+ err = 2;
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 7e88309d3229..5bd10409285b 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -45,8 +45,8 @@ struct {
#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
-SEC("tc")
-int bpf_sping_lock_test(struct __sk_buff *skb)
+SEC("cgroup_skb/ingress")
+int bpf_spin_lock_test(struct __sk_buff *skb)
{
volatile int credit = 0, max_credit = 100, pkt_len = 64;
struct hmap_elem zero = {}, *val;
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
new file mode 100644
index 000000000000..86cd183ef6dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct foo {
+ struct bpf_spin_lock lock;
+ int data;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct foo);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &array_map,
+ },
+};
+
+SEC(".data.A") struct bpf_spin_lock lockA;
+SEC(".data.B") struct bpf_spin_lock lockB;
+
+SEC("?tc")
+int lock_id_kptr_preserve(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_global_zero(void *ctx)
+{
+ bpf_this_cpu_ptr(&lockA);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+
+ f = bpf_map_lookup_elem(&array_map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_innermapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f = bpf_map_lookup_elem(map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+#define CHECK(test, A, B) \
+ SEC("?tc") \
+ int lock_id_mismatch_##test(void *ctx) \
+ { \
+ struct foo *f1, *f2, *v, *iv; \
+ int key = 0; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &key); \
+ if (!map) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &key); \
+ if (!iv) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &key); \
+ if (!v) \
+ return 0; \
+ f1 = bpf_obj_new(typeof(*f1)); \
+ if (!f1) \
+ return 0; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ bpf_spin_lock(A); \
+ bpf_spin_unlock(B); \
+ return 0; \
+ }
+
+CHECK(kptr_kptr, &f1->lock, &f2->lock);
+CHECK(kptr_global, &f1->lock, &lockA);
+CHECK(kptr_mapval, &f1->lock, &v->lock);
+CHECK(kptr_innermapval, &f1->lock, &iv->lock);
+
+CHECK(global_global, &lockA, &lockB);
+CHECK(global_kptr, &lockA, &f1->lock);
+CHECK(global_mapval, &lockA, &v->lock);
+CHECK(global_innermapval, &lockA, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_mapval_mapval(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+
+ f1 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(mapval_kptr, &v->lock, &f1->lock);
+CHECK(mapval_global, &v->lock, &lockB);
+CHECK(mapval_innermapval, &v->lock, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval1(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval2(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(innermapval_kptr, &iv->lock, &f1->lock);
+CHECK(innermapval_global, &iv->lock, &lockA);
+CHECK(innermapval_mapval, &iv->lock, &v->lock);
+
+#undef CHECK
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
new file mode 100644
index 000000000000..eb78e6f03129
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define IFNAMSIZ 16
+
+int ifindex, ingress_ifindex;
+char name[IFNAMSIZ];
+unsigned int inum;
+unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+void *bpf_rdonly_cast(void *, __u32) __ksym;
+
+SEC("?xdp")
+int md_xdp(struct xdp_md *ctx)
+{
+ struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx);
+ struct net_device *dev;
+
+ dev = kctx->rxq->dev;
+ ifindex = dev->ifindex;
+ inum = dev->nd_net.net->ns.inum;
+ __builtin_memcpy(name, dev->name, IFNAMSIZ);
+ ingress_ifindex = ctx->ingress_ifindex;
+ return XDP_PASS;
+}
+
+SEC("?tc")
+int md_skb(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb);
+ struct skb_shared_info *shared_info;
+ struct sk_buff *kskb2;
+
+ kskb_len = kskb->len;
+
+ /* Simulate the following kernel macro:
+ * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
+ */
+ shared_info = bpf_rdonly_cast(kskb->head + kskb->end,
+ bpf_core_type_id_kernel(struct skb_shared_info));
+ meta_len = shared_info->meta_len;
+ frag0_len = shared_info->frag_list->len;
+
+ /* kskb2 should be equal to kskb */
+ kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff));
+ kskb2_len = kskb2->len;
+ return 0;
+}
+
+SEC("?tp_btf/sys_enter")
+int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
+{
+ struct task_struct *task, *task_dup;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
+ (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
+ return 0;
+}
+
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int kctx_u64(void *ctx)
+{
+ u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64));
+
+ (void)kctx;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 9fe4c9336c6f..0cfece7ff4f8 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -309,11 +309,11 @@ class MainHeaderFileExtractor(SourceFileExtractor):
commands), which looks to the lists of options in other source files
but has different start and end markers:
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
Return a set containing all options, such as:
- {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
"""
start_marker = re.compile(f'"OPTIONS :=')
pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
@@ -336,7 +336,7 @@ class ManSubstitutionsExtractor(SourceFileExtractor):
Return a set containing all options, such as:
- {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
"""
start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
pattern = re.compile('\*\*([\w/-]+)\*\*')
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index e1a937277b54..3193915c5ee6 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -109,7 +109,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
+ .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_acquire", 3 },
{ "bpf_kfunc_call_test_release", 5 },
diff --git a/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c
new file mode 100644
index 000000000000..67a1c07ead34
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c
@@ -0,0 +1,174 @@
+{
+ /* This is equivalent to the following program:
+ *
+ * r6 = skb->sk;
+ * r7 = sk_fullsock(r6);
+ * r0 = sk_fullsock(r6);
+ * if (r0 == 0) return 0; (a)
+ * if (r0 != r7) return 0; (b)
+ * *r7->type; (c)
+ * return 0;
+ *
+ * It is safe to dereference r7 at point (c), because of (a) and (b).
+ * The test verifies that relation r0 == r7 is propagated from (b) to (c).
+ */
+ "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch",
+ .insns = {
+ /* r6 = skb->sk; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ /* if (r6 == 0) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8),
+ /* r7 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r0 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ /* if (r0 == null) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* if (r0 == r7) r0 = *(r7->type); */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R7 pointer comparison",
+},
+{
+ /* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+ "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch",
+ .insns = {
+ /* r6 = skb->sk */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ /* if (r6 == 0) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9),
+ /* r7 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r0 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ /* if (r0 == null) return 0; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ /* if (r0 == r7) return 0; */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ /* r0 = *(r7->type); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = REJECT,
+ .errstr = "R7 invalid mem access 'sock_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R7 pointer comparison",
+},
+{
+ /* Same as a first test, but not null should be inferred for JEQ branch */
+ "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch",
+ .insns = {
+ /* r6 = skb->sk; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ /* if (r6 == null) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9),
+ /* r7 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r0 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ /* if (r0 == null) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ /* if (r0 != r7) return 0; */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ /* r0 = *(r7->type); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+ /* return 0; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R7 pointer comparison",
+},
+{
+ /* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+ "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch",
+ .insns = {
+ /* r6 = skb->sk; */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ /* if (r6 == null) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8),
+ /* r7 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r0 = sk_fullsock(skb); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ /* if (r0 == null) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* if (r0 != r7) r0 = *(r7->type); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+ /* return 0; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = REJECT,
+ .errstr = "R7 invalid mem access 'sock_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R7 pointer comparison",
+},
+{
+ /* Maps are treated in a different branch of `mark_ptr_not_null_reg`,
+ * so separate test for maps case.
+ */
+ "jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE",
+ .insns = {
+ /* r9 = &some stack to use as key */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -8),
+ /* r8 = process local map */
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ /* r6 = map_lookup_elem(r8, r9); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* r7 = map_lookup_elem(r8, r9); */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* if (r6 == 0) return 0; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+ /* if (r6 != r7) return 0; */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1),
+ /* read *r7; */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_xdp_sock, queue_id)),
+ /* return 0; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_xskmap = { 3 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index fd683a32a276..9540164712b7 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -142,7 +142,7 @@
.kfunc = "bpf",
.expected_attach_type = BPF_LSM_MAC,
.flags = BPF_F_SLEEPABLE,
- .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+ .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
.fixup_kfunc_btf_id = {
{ "bpf_lookup_user_key", 2 },
{ "bpf_key_put", 4 },
@@ -163,7 +163,7 @@
.kfunc = "bpf",
.expected_attach_type = BPF_LSM_MAC,
.flags = BPF_F_SLEEPABLE,
- .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+ .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
.fixup_kfunc_btf_id = {
{ "bpf_lookup_system_key", 1 },
{ "bpf_key_put", 3 },
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
index b64d33e4833c..84838feba47f 100644
--- a/tools/testing/selftests/bpf/verifier/ringbuf.c
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -28,7 +28,7 @@
},
.fixup_map_ringbuf = { 1 },
.result = REJECT,
- .errstr = "dereference of modified alloc_mem ptr R1",
+ .errstr = "dereference of modified ringbuf_mem ptr R1",
},
{
"ringbuf: invalid reservation offset 2",
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index e23f07175e1b..9bb302dade23 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -84,7 +84,7 @@
},
.fixup_map_ringbuf = { 1 },
.result = REJECT,
- .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
+ .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited",
},
{
"check corrupted spill/fill",