1 files changed, 451 insertions, 249 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59a217ca2dfd..464ca3f01fe7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -87,10 +87,35 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+enum bpf_cgroup_iter_order {
+	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
+	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	struct {
+		enum bpf_cgroup_iter_order order;
+
+		/* At most one of cgroup_fd and cgroup_id can be non-zero. If
+		 * both are zero, the walk starts from the default cgroup v2
+		 * root. For walking v1 hierarchy, one should always explicitly
+		 * specify cgroup_fd.
+		 */
+		__u32	cgroup_fd;
+		__u64	cgroup_id;
+	} cgroup;
+	/* Parameters of task iterators. */
+	struct {
+		__u32	tid;
+		__u32	pid;
+		__u32	pid_fd;
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -897,7 +922,14 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
-	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
+	/* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching
+	 * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to
+	 * both cgroup-attached and other progs and supports all functionality
+	 * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark
+	 * BPF_MAP_TYPE_CGROUP_STORAGE deprecated.
+	 */
+	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 	BPF_MAP_TYPE_QUEUE,
@@ -909,6 +941,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
+	BPF_MAP_TYPE_USER_RINGBUF,
+	BPF_MAP_TYPE_CGRP_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -1233,7 +1267,7 @@ enum {
 
 /* Query effective (directly attached + inherited from ancestor cgroups)
  * programs that will be executed for events within a cgroup.
- * attach_flags with this flag are returned only for directly attached programs.
+ * attach_flags with this flag are always returned 0.
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
@@ -1432,7 +1466,10 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
-		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
+		/* output: per-program attach_flags.
+		 * not allowed to be set during effective query.
+		 */
+		__aligned_u64	prog_attach_flags;
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -2547,14 +2584,19 @@ union bpf_attr {
  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
  * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * 		  **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * 		  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * 		  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * 		  **TCP_BPF_RTO_MIN**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		* **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * 		  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -2573,10 +2615,12 @@ union bpf_attr {
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- *		  (room space is added or removed below the layer 2 header).
+ * 		  (room space is added or removed between the layer 2 and
+ * 		  layer 3 headers).
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * 		  (room space is added or removed below the layer 3 header).
+ * 		  (room space is added or removed between the layer 3 and
+ * 		  layer 4 headers).
  *
  *		The following flags are supported at this time:
  *
@@ -2608,7 +2652,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the endpoint referenced by *map* at
  * 		index *key*. Depending on its type, this *map* can contain
@@ -2769,12 +2813,10 @@ union bpf_attr {
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
- * 		It supports the following *level*\ s:
- *
- * 		* **IPPROTO_TCP**, which supports *optname*
- * 		  **TCP_CONGESTION**.
- * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		It supports the same set of *optname*\ s that is supported by
+ * 		the **bpf_setsockopt**\ () helper.  The exceptions are
+ * 		**TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * 		**TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -3008,8 +3050,18 @@ union bpf_attr {
  * 		**BPF_F_USER_STACK**
  * 			Collect a user space stack instead of a kernel stack.
  * 		**BPF_F_USER_BUILD_ID**
- * 			Collect buildid+offset instead of ips for user stack,
- * 			only valid if **BPF_F_USER_STACK** is also specified.
+ * 			Collect (build_id, file_offset) instead of ips for user
+ * 			stack, only valid if **BPF_F_USER_STACK** is also
+ * 			specified.
+ *
+ * 			*file_offset* is an offset relative to the beginning
+ * 			of the executable or shared object file backing the vma
+ * 			which the *ip* falls in. It is *not* an offset relative
+ * 			to that object's base address. Accordingly, it must be
+ * 			adjusted by adding (sh_addr - sh_offset), where
+ * 			sh_{addr,offset} correspond to the executable section
+ * 			containing *file_offset* in the object, for comparisons
+ * 			to symbols' st_value to be valid.
  *
  * 		**bpf_get_stack**\ () can collect up to
  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
@@ -4425,7 +4477,7 @@ union bpf_attr {
  *
  *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** on failrue to parse the existing header options.
+ *		**-EFAULT** on failure to parse the existing header options.
  *
  *		**-EPERM** if the helper cannot be used under the current
  *		*skops*\ **->op**.
@@ -4634,7 +4686,7 @@ union bpf_attr {
  *		a *map* with *task* as the **key**.  From this
  *		perspective,  the usage is not much different from
  *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
- *		helper enforces the key must be an task_struct and the map must also
+ *		helper enforces the key must be a task_struct and the map must also
  *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
  *
  *		Underneath, the value is stored locally at *task* instead of
@@ -4692,7 +4744,7 @@ union bpf_attr {
  *
  * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
  *	Description
- *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		Returns the stored IMA hash of the *inode* (if it's available).
  *		If the hash is larger than *size*, then only *size*
  *		bytes will be copied to *dst*
  *	Return
@@ -4716,12 +4768,12 @@ union bpf_attr {
  *
  *		The argument *len_diff* can be used for querying with a planned
  *		size change. This allows to check MTU prior to changing packet
- *		ctx. Providing an *len_diff* adjustment that is larger than the
+ *		ctx. Providing a *len_diff* adjustment that is larger than the
  *		actual packet size (resulting in negative packet size) will in
- *		principle not exceed the MTU, why it is not considered a
- *		failure.  Other BPF-helpers are needed for performing the
- *		planned size change, why the responsability for catch a negative
- *		packet size belong in those helpers.
+ *		principle not exceed the MTU, which is why it is not considered
+ *		a failure.  Other BPF helpers are needed for performing the
+ *		planned size change; therefore the responsibility for catching
+ *		a negative packet size belongs in those helpers.
  *
  *		Specifying *ifindex* zero means the MTU check is performed
  *		against the current net device.  This is practical if this isn't
@@ -4919,6 +4971,7 @@ union bpf_attr {
  * 		Get address of the traced function (for tracing and kprobe programs).
  * 	Return
  * 		Address of the traced function.
+ * 		0 for kprobes placed within the function (not at the entry).
  *
  * u64 bpf_get_attach_cookie(void *ctx)
  * 	Description
@@ -5048,12 +5101,12 @@ union bpf_attr {
  *
  * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
  *	Description
- *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		Get **n**-th argument register (zero based) of the traced function (for tracing programs)
  *		returned in **value**.
  *
  *	Return
  *		0 on success.
- *		**-EINVAL** if n >= arguments count of traced function.
+ *		**-EINVAL** if n >= argument register count of traced function.
  *
  * long bpf_get_func_ret(void *ctx, u64 *value)
  *	Description
@@ -5066,24 +5119,37 @@ union bpf_attr {
  *
  * long bpf_get_func_arg_cnt(void *ctx)
  *	Description
- *		Get number of arguments of the traced function (for tracing programs).
+ *		Get number of registers of the traced function (for tracing programs) where
+ *		function arguments are stored in these registers.
  *
  *	Return
- *		The number of arguments of the traced function.
+ *		The number of argument registers of the traced function.
  *
  * int bpf_get_retval(void)
  *	Description
- *		Get the syscall's return value that will be returned to userspace.
+ *		Get the BPF program's return value that will be returned to the upper layers.
  *
- *		This helper is currently supported by cgroup programs only.
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
  *	Return
- *		The syscall's return value.
+ *		The BPF program's return value.
  *
  * int bpf_set_retval(int retval)
  *	Description
- *		Set the syscall's return value that will be returned to userspace.
+ *		Set the BPF program's return value that will be returned to the upper layers.
+ *
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
+ *
+ *		Note that there is the following corner case where the program exports an error
+ *		via bpf_set_retval but signals success via 'return 1':
+ *
+ *			bpf_set_retval(-EPERM);
+ *			return 1;
+ *
+ *		In this case, the BPF program's return value will use helper's -EPERM. This
+ *		still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
  *
- *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
@@ -5227,7 +5293,7 @@ union bpf_attr {
  *	Return
  *		Nothing. Always succeeds.
  *
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
  *	Description
  *		Read *len* bytes from *src* into *dst*, starting from *offset*
  *		into *src*.
@@ -5237,7 +5303,7 @@ union bpf_attr {
  *		of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
  *		*flags* is not 0.
  *
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
  *	Description
  *		Write *len* bytes from *src* into *dst*, starting from *offset*
  *		into *dst*.
@@ -5247,7 +5313,7 @@ union bpf_attr {
  *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
  *		is a read-only dynptr or if *flags* is not 0.
  *
- * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
  *	Description
  *		Get a pointer to the underlying dynptr data.
  *
@@ -5331,224 +5397,321 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ *	Description
+ *		A nonsettable system-wide clock derived from wall-clock time but
+ *		ignoring leap seconds.  This clock does not experience
+ *		discontinuities and backwards jumps caused by NTP inserting leap
+ *		seconds as CLOCK_REALTIME does.
+ *
+ *		See: **clock_gettime**\ (**CLOCK_TAI**)
+ *	Return
+ *		Current *ktime*.
+ *
+ * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
+ *	Description
+ *		Drain samples from the specified user ring buffer, and invoke
+ *		the provided callback for each such sample:
+ *
+ *		long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
+ *
+ *		If **callback_fn** returns 0, the helper will continue to try
+ *		and drain the next sample, up to a maximum of
+ *		BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+ *		the helper will skip the rest of the samples and return. Other
+ *		return values are not used now, and will be rejected by the
+ *		verifier.
+ *	Return
+ *		The number of drained samples if no error was encountered while
+ *		draining samples, or 0 if no samples were present in the ring
+ *		buffer. If a user-space producer was epoll-waiting on this map,
+ *		and at least one sample was drained, they will receive an event
+ *		notification notifying them of available space in the ring
+ *		buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+ *		function, no wakeup notification will be sent. If the
+ *		BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+ *		be sent even if no sample was drained.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EBUSY** if the ring buffer is contended, and another calling
+ *		context was concurrently draining the ring buffer.
+ *
+ *		**-EINVAL** if user-space is not properly tracking the ring
+ *		buffer due to the producer position not being aligned to 8
+ *		bytes, a sample not being aligned to 8 bytes, or the producer
+ *		position not matching the advertised length of a sample.
+ *
+ *		**-E2BIG** if user-space has tried to publish a sample which is
+ *		larger than the size of the ring buffer, or which cannot fit
+ *		within a struct bpf_dynptr.
+ *
+ * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *cgroup*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *cgroup* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
+ *		helper enforces the key must be a cgroup struct and the map must also
+ *		be a **BPF_MAP_TYPE_CGRP_STORAGE**.
+ *
+ *		In reality, the local-storage value is embedded directly inside of the
+ *		*cgroup* object itself, rather than being located in the
+ *		**BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
+ *		queried for some *map* on a *cgroup* object, the kernel will perform an
+ *		O(n) iteration over all of the live local-storage values for that
+ *		*cgroup* object until the local-storage value for the *map* is found.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+ *	Description
+ *		Delete a bpf_local_storage from a *cgroup*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
-#define __BPF_FUNC_MAPPER(FN)		\
-	FN(unspec),			\
-	FN(map_lookup_elem),		\
-	FN(map_update_elem),		\
-	FN(map_delete_elem),		\
-	FN(probe_read),			\
-	FN(ktime_get_ns),		\
-	FN(trace_printk),		\
-	FN(get_prandom_u32),		\
-	FN(get_smp_processor_id),	\
-	FN(skb_store_bytes),		\
-	FN(l3_csum_replace),		\
-	FN(l4_csum_replace),		\
-	FN(tail_call),			\
-	FN(clone_redirect),		\
-	FN(get_current_pid_tgid),	\
-	FN(get_current_uid_gid),	\
-	FN(get_current_comm),		\
-	FN(get_cgroup_classid),		\
-	FN(skb_vlan_push),		\
-	FN(skb_vlan_pop),		\
-	FN(skb_get_tunnel_key),		\
-	FN(skb_set_tunnel_key),		\
-	FN(perf_event_read),		\
-	FN(redirect),			\
-	FN(get_route_realm),		\
-	FN(perf_event_output),		\
-	FN(skb_load_bytes),		\
-	FN(get_stackid),		\
-	FN(csum_diff),			\
-	FN(skb_get_tunnel_opt),		\
-	FN(skb_set_tunnel_opt),		\
-	FN(skb_change_proto),		\
-	FN(skb_change_type),		\
-	FN(skb_under_cgroup),		\
-	FN(get_hash_recalc),		\
-	FN(get_current_task),		\
-	FN(probe_write_user),		\
-	FN(current_task_under_cgroup),	\
-	FN(skb_change_tail),		\
-	FN(skb_pull_data),		\
-	FN(csum_update),		\
-	FN(set_hash_invalid),		\
-	FN(get_numa_node_id),		\
-	FN(skb_change_head),		\
-	FN(xdp_adjust_head),		\
-	FN(probe_read_str),		\
-	FN(get_socket_cookie),		\
-	FN(get_socket_uid),		\
-	FN(set_hash),			\
-	FN(setsockopt),			\
-	FN(skb_adjust_room),		\
-	FN(redirect_map),		\
-	FN(sk_redirect_map),		\
-	FN(sock_map_update),		\
-	FN(xdp_adjust_meta),		\
-	FN(perf_event_read_value),	\
-	FN(perf_prog_read_value),	\
-	FN(getsockopt),			\
-	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),	\
-	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),		\
-	FN(bind),			\
-	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),		\
-	FN(get_stack),			\
-	FN(skb_load_bytes_relative),	\
-	FN(fib_lookup),			\
-	FN(sock_hash_update),		\
-	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),		\
-	FN(lwt_push_encap),		\
-	FN(lwt_seg6_store_bytes),	\
-	FN(lwt_seg6_adjust_srh),	\
-	FN(lwt_seg6_action),		\
-	FN(rc_repeat),			\
-	FN(rc_keydown),			\
-	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),		\
-	FN(sk_select_reuseport),	\
-	FN(skb_ancestor_cgroup_id),	\
-	FN(sk_lookup_tcp),		\
-	FN(sk_lookup_udp),		\
-	FN(sk_release),			\
-	FN(map_push_elem),		\
-	FN(map_pop_elem),		\
-	FN(map_peek_elem),		\
-	FN(msg_push_data),		\
-	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),		\
-	FN(spin_lock),			\
-	FN(spin_unlock),		\
-	FN(sk_fullsock),		\
-	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),		\
-	FN(get_listener_sock),		\
-	FN(skc_lookup_tcp),		\
-	FN(tcp_check_syncookie),	\
-	FN(sysctl_get_name),		\
-	FN(sysctl_get_current_value),	\
-	FN(sysctl_get_new_value),	\
-	FN(sysctl_set_new_value),	\
-	FN(strtol),			\
-	FN(strtoul),			\
-	FN(sk_storage_get),		\
-	FN(sk_storage_delete),		\
-	FN(send_signal),		\
-	FN(tcp_gen_syncookie),		\
-	FN(skb_output),			\
-	FN(probe_read_user),		\
-	FN(probe_read_kernel),		\
-	FN(probe_read_user_str),	\
-	FN(probe_read_kernel_str),	\
-	FN(tcp_send_ack),		\
-	FN(send_signal_thread),		\
-	FN(jiffies64),			\
-	FN(read_branch_records),	\
-	FN(get_ns_current_pid_tgid),	\
-	FN(xdp_output),			\
-	FN(get_netns_cookie),		\
-	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),			\
-	FN(ktime_get_boot_ns),		\
-	FN(seq_printf),			\
-	FN(seq_write),			\
-	FN(sk_cgroup_id),		\
-	FN(sk_ancestor_cgroup_id),	\
-	FN(ringbuf_output),		\
-	FN(ringbuf_reserve),		\
-	FN(ringbuf_submit),		\
-	FN(ringbuf_discard),		\
-	FN(ringbuf_query),		\
-	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),		\
-	FN(skc_to_tcp_sock),		\
-	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),	\
-	FN(skc_to_udp6_sock),		\
-	FN(get_task_stack),		\
-	FN(load_hdr_opt),		\
-	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),		\
-	FN(inode_storage_get),		\
-	FN(inode_storage_delete),	\
-	FN(d_path),			\
-	FN(copy_from_user),		\
-	FN(snprintf_btf),		\
-	FN(seq_printf_btf),		\
-	FN(skb_cgroup_classid),		\
-	FN(redirect_neigh),		\
-	FN(per_cpu_ptr),		\
-	FN(this_cpu_ptr),		\
-	FN(redirect_peer),		\
-	FN(task_storage_get),		\
-	FN(task_storage_delete),	\
-	FN(get_current_task_btf),	\
-	FN(bprm_opts_set),		\
-	FN(ktime_get_coarse_ns),	\
-	FN(ima_inode_hash),		\
-	FN(sock_from_file),		\
-	FN(check_mtu),			\
-	FN(for_each_map_elem),		\
-	FN(snprintf),			\
-	FN(sys_bpf),			\
-	FN(btf_find_by_name_kind),	\
-	FN(sys_close),			\
-	FN(timer_init),			\
-	FN(timer_set_callback),		\
-	FN(timer_start),		\
-	FN(timer_cancel),		\
-	FN(get_func_ip),		\
-	FN(get_attach_cookie),		\
-	FN(task_pt_regs),		\
-	FN(get_branch_snapshot),	\
-	FN(trace_vprintk),		\
-	FN(skc_to_unix_sock),		\
-	FN(kallsyms_lookup_name),	\
-	FN(find_vma),			\
-	FN(loop),			\
-	FN(strncmp),			\
-	FN(get_func_arg),		\
-	FN(get_func_ret),		\
-	FN(get_func_arg_cnt),		\
-	FN(get_retval),			\
-	FN(set_retval),			\
-	FN(xdp_get_buff_len),		\
-	FN(xdp_load_bytes),		\
-	FN(xdp_store_bytes),		\
-	FN(copy_from_user_task),	\
-	FN(skb_set_tstamp),		\
-	FN(ima_file_hash),		\
-	FN(kptr_xchg),			\
-	FN(map_lookup_percpu_elem),     \
-	FN(skc_to_mptcp_sock),		\
-	FN(dynptr_from_mem),		\
-	FN(ringbuf_reserve_dynptr),	\
-	FN(ringbuf_submit_dynptr),	\
-	FN(ringbuf_discard_dynptr),	\
-	FN(dynptr_read),		\
-	FN(dynptr_write),		\
-	FN(dynptr_data),		\
-	FN(tcp_raw_gen_syncookie_ipv4),	\
-	FN(tcp_raw_gen_syncookie_ipv6),	\
-	FN(tcp_raw_check_syncookie_ipv4),	\
-	FN(tcp_raw_check_syncookie_ipv6),	\
+#define ___BPF_FUNC_MAPPER(FN, ctx...)			\
+	FN(unspec, 0, ##ctx)				\
+	FN(map_lookup_elem, 1, ##ctx)			\
+	FN(map_update_elem, 2, ##ctx)			\
+	FN(map_delete_elem, 3, ##ctx)			\
+	FN(probe_read, 4, ##ctx)			\
+	FN(ktime_get_ns, 5, ##ctx)			\
+	FN(trace_printk, 6, ##ctx)			\
+	FN(get_prandom_u32, 7, ##ctx)			\
+	FN(get_smp_processor_id, 8, ##ctx)		\
+	FN(skb_store_bytes, 9, ##ctx)			\
+	FN(l3_csum_replace, 10, ##ctx)			\
+	FN(l4_csum_replace, 11, ##ctx)			\
+	FN(tail_call, 12, ##ctx)			\
+	FN(clone_redirect, 13, ##ctx)			\
+	FN(get_current_pid_tgid, 14, ##ctx)		\
+	FN(get_current_uid_gid, 15, ##ctx)		\
+	FN(get_current_comm, 16, ##ctx)			\
+	FN(get_cgroup_classid, 17, ##ctx)		\
+	FN(skb_vlan_push, 18, ##ctx)			\
+	FN(skb_vlan_pop, 19, ##ctx)			\
+	FN(skb_get_tunnel_key, 20, ##ctx)		\
+	FN(skb_set_tunnel_key, 21, ##ctx)		\
+	FN(perf_event_read, 22, ##ctx)			\
+	FN(redirect, 23, ##ctx)				\
+	FN(get_route_realm, 24, ##ctx)			\
+	FN(perf_event_output, 25, ##ctx)		\
+	FN(skb_load_bytes, 26, ##ctx)			\
+	FN(get_stackid, 27, ##ctx)			\
+	FN(csum_diff, 28, ##ctx)			\
+	FN(skb_get_tunnel_opt, 29, ##ctx)		\
+	FN(skb_set_tunnel_opt, 30, ##ctx)		\
+	FN(skb_change_proto, 31, ##ctx)			\
+	FN(skb_change_type, 32, ##ctx)			\
+	FN(skb_under_cgroup, 33, ##ctx)			\
+	FN(get_hash_recalc, 34, ##ctx)			\
+	FN(get_current_task, 35, ##ctx)			\
+	FN(probe_write_user, 36, ##ctx)			\
+	FN(current_task_under_cgroup, 37, ##ctx)	\
+	FN(skb_change_tail, 38, ##ctx)			\
+	FN(skb_pull_data, 39, ##ctx)			\
+	FN(csum_update, 40, ##ctx)			\
+	FN(set_hash_invalid, 41, ##ctx)			\
+	FN(get_numa_node_id, 42, ##ctx)			\
+	FN(skb_change_head, 43, ##ctx)			\
+	FN(xdp_adjust_head, 44, ##ctx)			\
+	FN(probe_read_str, 45, ##ctx)			\
+	FN(get_socket_cookie, 46, ##ctx)		\
+	FN(get_socket_uid, 47, ##ctx)			\
+	FN(set_hash, 48, ##ctx)				\
+	FN(setsockopt, 49, ##ctx)			\
+	FN(skb_adjust_room, 50, ##ctx)			\
+	FN(redirect_map, 51, ##ctx)			\
+	FN(sk_redirect_map, 52, ##ctx)			\
+	FN(sock_map_update, 53, ##ctx)			\
+	FN(xdp_adjust_meta, 54, ##ctx)			\
+	FN(perf_event_read_value, 55, ##ctx)		\
+	FN(perf_prog_read_value, 56, ##ctx)		\
+	FN(getsockopt, 57, ##ctx)			\
+	FN(override_return, 58, ##ctx)			\
+	FN(sock_ops_cb_flags_set, 59, ##ctx)		\
+	FN(msg_redirect_map, 60, ##ctx)			\
+	FN(msg_apply_bytes, 61, ##ctx)			\
+	FN(msg_cork_bytes, 62, ##ctx)			\
+	FN(msg_pull_data, 63, ##ctx)			\
+	FN(bind, 64, ##ctx)				\
+	FN(xdp_adjust_tail, 65, ##ctx)			\
+	FN(skb_get_xfrm_state, 66, ##ctx)		\
+	FN(get_stack, 67, ##ctx)			\
+	FN(skb_load_bytes_relative, 68, ##ctx)		\
+	FN(fib_lookup, 69, ##ctx)			\
+	FN(sock_hash_update, 70, ##ctx)			\
+	FN(msg_redirect_hash, 71, ##ctx)		\
+	FN(sk_redirect_hash, 72, ##ctx)			\
+	FN(lwt_push_encap, 73, ##ctx)			\
+	FN(lwt_seg6_store_bytes, 74, ##ctx)		\
+	FN(lwt_seg6_adjust_srh, 75, ##ctx)		\
+	FN(lwt_seg6_action, 76, ##ctx)			\
+	FN(rc_repeat, 77, ##ctx)			\
+	FN(rc_keydown, 78, ##ctx)			\
+	FN(skb_cgroup_id, 79, ##ctx)			\
+	FN(get_current_cgroup_id, 80, ##ctx)		\
+	FN(get_local_storage, 81, ##ctx)		\
+	FN(sk_select_reuseport, 82, ##ctx)		\
+	FN(skb_ancestor_cgroup_id, 83, ##ctx)		\
+	FN(sk_lookup_tcp, 84, ##ctx)			\
+	FN(sk_lookup_udp, 85, ##ctx)			\
+	FN(sk_release, 86, ##ctx)			\
+	FN(map_push_elem, 87, ##ctx)			\
+	FN(map_pop_elem, 88, ##ctx)			\
+	FN(map_peek_elem, 89, ##ctx)			\
+	FN(msg_push_data, 90, ##ctx)			\
+	FN(msg_pop_data, 91, ##ctx)			\
+	FN(rc_pointer_rel, 92, ##ctx)			\
+	FN(spin_lock, 93, ##ctx)			\
+	FN(spin_unlock, 94, ##ctx)			\
+	FN(sk_fullsock, 95, ##ctx)			\
+	FN(tcp_sock, 96, ##ctx)				\
+	FN(skb_ecn_set_ce, 97, ##ctx)			\
+	FN(get_listener_sock, 98, ##ctx)		\
+	FN(skc_lookup_tcp, 99, ##ctx)			\
+	FN(tcp_check_syncookie, 100, ##ctx)		\
+	FN(sysctl_get_name, 101, ##ctx)			\
+	FN(sysctl_get_current_value, 102, ##ctx)	\
+	FN(sysctl_get_new_value, 103, ##ctx)		\
+	FN(sysctl_set_new_value, 104, ##ctx)		\
+	FN(strtol, 105, ##ctx)				\
+	FN(strtoul, 106, ##ctx)				\
+	FN(sk_storage_get, 107, ##ctx)			\
+	FN(sk_storage_delete, 108, ##ctx)		\
+	FN(send_signal, 109, ##ctx)			\
+	FN(tcp_gen_syncookie, 110, ##ctx)		\
+	FN(skb_output, 111, ##ctx)			\
+	FN(probe_read_user, 112, ##ctx)			\
+	FN(probe_read_kernel, 113, ##ctx)		\
+	FN(probe_read_user_str, 114, ##ctx)		\
+	FN(probe_read_kernel_str, 115, ##ctx)		\
+	FN(tcp_send_ack, 116, ##ctx)			\
+	FN(send_signal_thread, 117, ##ctx)		\
+	FN(jiffies64, 118, ##ctx)			\
+	FN(read_branch_records, 119, ##ctx)		\
+	FN(get_ns_current_pid_tgid, 120, ##ctx)		\
+	FN(xdp_output, 121, ##ctx)			\
+	FN(get_netns_cookie, 122, ##ctx)		\
+	FN(get_current_ancestor_cgroup_id, 123, ##ctx)	\
+	FN(sk_assign, 124, ##ctx)			\
+	FN(ktime_get_boot_ns, 125, ##ctx)		\
+	FN(seq_printf, 126, ##ctx)			\
+	FN(seq_write, 127, ##ctx)			\
+	FN(sk_cgroup_id, 128, ##ctx)			\
+	FN(sk_ancestor_cgroup_id, 129, ##ctx)		\
+	FN(ringbuf_output, 130, ##ctx)			\
+	FN(ringbuf_reserve, 131, ##ctx)			\
+	FN(ringbuf_submit, 132, ##ctx)			\
+	FN(ringbuf_discard, 133, ##ctx)			\
+	FN(ringbuf_query, 134, ##ctx)			\
+	FN(csum_level, 135, ##ctx)			\
+	FN(skc_to_tcp6_sock, 136, ##ctx)		\
+	FN(skc_to_tcp_sock, 137, ##ctx)			\
+	FN(skc_to_tcp_timewait_sock, 138, ##ctx)	\
+	FN(skc_to_tcp_request_sock, 139, ##ctx)		\
+	FN(skc_to_udp6_sock, 140, ##ctx)		\
+	FN(get_task_stack, 141, ##ctx)			\
+	FN(load_hdr_opt, 142, ##ctx)			\
+	FN(store_hdr_opt, 143, ##ctx)			\
+	FN(reserve_hdr_opt, 144, ##ctx)			\
+	FN(inode_storage_get, 145, ##ctx)		\
+	FN(inode_storage_delete, 146, ##ctx)		\
+	FN(d_path, 147, ##ctx)				\
+	FN(copy_from_user, 148, ##ctx)			\
+	FN(snprintf_btf, 149, ##ctx)			\
+	FN(seq_printf_btf, 150, ##ctx)			\
+	FN(skb_cgroup_classid, 151, ##ctx)		\
+	FN(redirect_neigh, 152, ##ctx)			\
+	FN(per_cpu_ptr, 153, ##ctx)			\
+	FN(this_cpu_ptr, 154, ##ctx)			\
+	FN(redirect_peer, 155, ##ctx)			\
+	FN(task_storage_get, 156, ##ctx)		\
+	FN(task_storage_delete, 157, ##ctx)		\
+	FN(get_current_task_btf, 158, ##ctx)		\
+	FN(bprm_opts_set, 159, ##ctx)			\
+	FN(ktime_get_coarse_ns, 160, ##ctx)		\
+	FN(ima_inode_hash, 161, ##ctx)			\
+	FN(sock_from_file, 162, ##ctx)			\
+	FN(check_mtu, 163, ##ctx)			\
+	FN(for_each_map_elem, 164, ##ctx)		\
+	FN(snprintf, 165, ##ctx)			\
+	FN(sys_bpf, 166, ##ctx)				\
+	FN(btf_find_by_name_kind, 167, ##ctx)		\
+	FN(sys_close, 168, ##ctx)			\
+	FN(timer_init, 169, ##ctx)			\
+	FN(timer_set_callback, 170, ##ctx)		\
+	FN(timer_start, 171, ##ctx)			\
+	FN(timer_cancel, 172, ##ctx)			\
+	FN(get_func_ip, 173, ##ctx)			\
+	FN(get_attach_cookie, 174, ##ctx)		\
+	FN(task_pt_regs, 175, ##ctx)			\
+	FN(get_branch_snapshot, 176, ##ctx)		\
+	FN(trace_vprintk, 177, ##ctx)			\
+	FN(skc_to_unix_sock, 178, ##ctx)		\
+	FN(kallsyms_lookup_name, 179, ##ctx)		\
+	FN(find_vma, 180, ##ctx)			\
+	FN(loop, 181, ##ctx)				\
+	FN(strncmp, 182, ##ctx)				\
+	FN(get_func_arg, 183, ##ctx)			\
+	FN(get_func_ret, 184, ##ctx)			\
+	FN(get_func_arg_cnt, 185, ##ctx)		\
+	FN(get_retval, 186, ##ctx)			\
+	FN(set_retval, 187, ##ctx)			\
+	FN(xdp_get_buff_len, 188, ##ctx)		\
+	FN(xdp_load_bytes, 189, ##ctx)			\
+	FN(xdp_store_bytes, 190, ##ctx)			\
+	FN(copy_from_user_task, 191, ##ctx)		\
+	FN(skb_set_tstamp, 192, ##ctx)			\
+	FN(ima_file_hash, 193, ##ctx)			\
+	FN(kptr_xchg, 194, ##ctx)			\
+	FN(map_lookup_percpu_elem, 195, ##ctx)		\
+	FN(skc_to_mptcp_sock, 196, ##ctx)		\
+	FN(dynptr_from_mem, 197, ##ctx)			\
+	FN(ringbuf_reserve_dynptr, 198, ##ctx)		\
+	FN(ringbuf_submit_dynptr, 199, ##ctx)		\
+	FN(ringbuf_discard_dynptr, 200, ##ctx)		\
+	FN(dynptr_read, 201, ##ctx)			\
+	FN(dynptr_write, 202, ##ctx)			\
+	FN(dynptr_data, 203, ##ctx)			\
+	FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx)	\
+	FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx)	\
+	FN(ktime_get_tai_ns, 208, ##ctx)		\
+	FN(user_ringbuf_drain, 209, ##ctx)		\
+	FN(cgrp_storage_get, 210, ##ctx)		\
+	FN(cgrp_storage_delete, 211, ##ctx)		\
 	/* */
 
+/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
+ * know or care about integer value that is now passed as second argument
+ */
+#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name),
+#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN)
+
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
  */
-#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y,
 enum bpf_func_id {
-	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+	___BPF_FUNC_MAPPER(__BPF_ENUM_FN)
 	__BPF_FUNC_MAX_ID,
 };
 #undef __BPF_ENUM_FN
@@ -5603,6 +5766,11 @@ enum {
 	BPF_F_SEQ_NUMBER		= (1ULL << 3),
 };
 
+/* BPF_FUNC_skb_get_tunnel_key flags. */
+enum {
+	BPF_F_TUNINFO_FLAGS		= (1ULL << 4),
+};
+
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
@@ -5792,7 +5960,10 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;	/* Padding, future use. */
+	union {
+		__u16 tunnel_ext;	/* compat */
+		__be16 tunnel_flags;
+	};
 	__u32 tunnel_label;
 	union {
 		__u32 local_ipv4;
@@ -5836,6 +6007,11 @@ enum bpf_ret_code {
 	 *    represented by BPF_REDIRECT above).
 	 */
 	BPF_LWT_REROUTE = 128,
+	/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
+	 *   to indicate that no custom dissection was performed, and
+	 *   fallback to standard dissector is requested.
+	 */
+	BPF_FLOW_DISSECTOR_CONTINUE = 129,
 };
 
 struct bpf_sock {
@@ -6134,11 +6310,26 @@ struct bpf_link_info {
 		struct {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
+
+			/* If the iter specific field is 32 bits, it can be put
+			 * in the first or second union. Otherwise it should be
+			 * put in the second union.
+			 */
 			union {
 				struct {
 					__u32 map_id;
 				} map;
 			};
+			union {
+				struct {
+					__u64 cgroup_id;
+					__u32 order;
+				} cgroup;
+				struct {
+					__u32 tid;
+					__u32 pid;
+				} task;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
@@ -6257,6 +6448,7 @@ struct bpf_sock_ops {
 				 * the outgoing header has not
 				 * been written yet.
 				 */
+	__u64 skb_hwtstamp;
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
@@ -6699,6 +6891,16 @@ struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_list_head {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.