summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-02-04 16:46:58 -0500
committerDavid S. Miller <davem@davemloft.net>2018-02-04 16:46:58 -0500
commita6b88814ab541d386d793f6df260a3e4d5cccb11 (patch)
treea7c633be819bb450edab3beebd1a9a0cfba8bb72
parent35277995e17919ab838beae765f440674e8576eb (diff)
parent09c0656dfabd0b15e410eca67dc45cb14ec9c13a (diff)
downloadlinux-a6b88814ab541d386d793f6df260a3e4d5cccb11.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says: ==================== pull-request: bpf 2018-02-02 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) support XDP attach in libbpf, from Eric. 2) minor fixes, from Daniel, Jakub, Yonghong, Alexei. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/bpf/bpf_devel_QA.txt31
-rw-r--r--drivers/net/netdevsim/bpf.c5
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--kernel/bpf/core.c32
-rw-r--r--net/bpf/test_run.c4
-rw-r--r--samples/bpf/Makefile2
-rw-r--r--samples/bpf/bpf_load.c102
-rw-r--r--samples/bpf/bpf_load.h2
-rw-r--r--samples/bpf/xdp1_user.c4
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c6
-rw-r--r--samples/bpf/xdp_redirect_map_user.c8
-rw-r--r--samples/bpf/xdp_redirect_user.c8
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c10
-rw-r--r--samples/bpf/xdp_rxq_info_user.c4
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c6
-rw-r--r--tools/include/uapi/linux/if_link.h943
-rw-r--r--tools/include/uapi/linux/netlink.h251
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile6
-rw-r--r--tools/lib/bpf/bpf.c135
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/libbpf.c4
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/lib/bpf/nlattr.c187
-rw-r--r--tools/lib/bpf/nlattr.h72
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c14
28 files changed, 1716 insertions, 141 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index cefef855dea4..84cbb302f2b5 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -516,4 +516,35 @@ A: LLVM has a -mcpu selector for the BPF back end in order to allow the
By the way, the BPF kernel selftests run with -mcpu=probe for better
test coverage.
+Q: In some cases clang flag "-target bpf" is used but in other cases the
+ default clang target, which matches the underlying architecture, is used.
+ What is the difference and when I should use which?
+
+A: Although LLVM IR generation and optimization try to stay architecture
+ independent, "-target <arch>" still has some impact on generated code:
+
+ - BPF program may recursively include header file(s) with file scope
+ inline assembly codes. The default target can handle this well,
+ while bpf target may fail if bpf backend assembler does not
+ understand these assembly codes, which is true in most cases.
+
+ - When compiled without -g, additional elf sections, e.g.,
+ .eh_frame and .rela.eh_frame, may be present in the object file
+ with default target, but not with bpf target.
+
+ - The default target may turn a C switch statement into a switch table
+ lookup and jump operation. Since the switch table is placed
+ in the global readonly section, the bpf program will fail to load.
+ The bpf target does not support switch table optimization.
+ The clang option "-fno-jump-tables" can be used to disable
+ switch table generation.
+
+ You should use default target when:
+
+ - Your program includes a header file, e.g., ptrace.h, which eventually
+ pulls in some header files containing file scope host assembly codes.
+ - You can add "-fno-jump-tables" to work around the switch table issue.
+
+ Otherwise, you can use bpf target.
+
Happy BPF hacking!
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index de73c1ff0939..75c25306d234 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -480,8 +480,7 @@ static int
nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
{
struct nsim_bpf_bound_map *nmap;
- unsigned int i;
- int err;
+ int i, err;
if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY &&
offmap->map.map_type != BPF_MAP_TYPE_HASH))
@@ -518,7 +517,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
return 0;
err_free:
- while (--i) {
+ while (--i >= 0) {
kfree(nmap->entry[i].key);
kfree(nmap->entry[i].value);
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4c77f39ebd65..5eef6c8e2741 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3228,6 +3228,12 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
}
#endif
+static inline struct netdev_rx_queue *
+__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
+{
+ return dev->_rx + rxq;
+}
+
#ifdef CONFIG_SYSFS
static inline unsigned int get_netdev_rx_queue_index(
struct netdev_rx_queue *queue)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5f35f93dcab2..29ca9208dcfa 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1576,25 +1576,41 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
__u32 __user *prog_ids, u32 cnt)
{
struct bpf_prog **prog;
- u32 i = 0, id;
-
+ unsigned long err = 0;
+ u32 i = 0, *ids;
+ bool nospc;
+
+ /* users of this function are doing:
+ * cnt = bpf_prog_array_length();
+ * if (cnt > 0)
+ * bpf_prog_array_copy_to_user(..., cnt);
+ * so below kcalloc doesn't need extra cnt > 0 check, but
+ * bpf_prog_array_length() releases rcu lock and
+ * prog array could have been swapped with empty or larger array,
+ * so always copy 'cnt' prog_ids to the user.
+ * In a rare race the user will see zero prog_ids
+ */
+ ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+ if (!ids)
+ return -ENOMEM;
rcu_read_lock();
prog = rcu_dereference(progs)->progs;
for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
continue;
- id = (*prog)->aux->id;
- if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
- rcu_read_unlock();
- return -EFAULT;
- }
+ ids[i] = (*prog)->aux->id;
if (++i == cnt) {
prog++;
break;
}
}
+ nospc = !!(*prog);
rcu_read_unlock();
- if (*prog)
+ err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
+ kfree(ids);
+ if (err)
+ return -EFAULT;
+ if (nospc)
return -ENOSPC;
return 0;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a86e6687026e..2ced48662c1f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -151,6 +151,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
{
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
+ struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
void *data;
@@ -165,6 +166,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
+ rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
retval = bpf_test_run(prog, &xdp, repeat, &duration);
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
size = xdp.data_end - xdp.data;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 64335bb94f9f..ec3fc8d88e87 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -45,7 +45,7 @@ hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 242631aa4ea2..69806d74fa53 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -695,105 +695,3 @@ struct ksym *ksym_search(long key)
return &syms[0];
}
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
- if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
- }
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- printf("recv from netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != getpid()) {
- printf("Wrong pid %d, expected %d\n",
- nh->nlmsg_pid, getpid());
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- printf("Wrong seq %d, expected %d\n",
- nh->nlmsg_seq, seq);
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- printf("nlmsg error %s\n", strerror(-err->error));
- goto cleanup;
- case NLMSG_DONE:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 7d57a4248893..453c200b389b 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -61,5 +61,5 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index fdaefe91801d..b901ee2b3336 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -25,7 +25,7 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 35fec9fecb57..23744a8aaf21 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -26,7 +26,7 @@ static const char *__doc__ =
/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
* use bpf/libbpf.h), but cannot as (currently) needed for XDP
- * attaching to a device via set_link_xdp_fd()
+ * attaching to a device via bpf_set_link_xdp_fd()
*/
#include "libbpf.h"
#include "bpf_load.h"
@@ -67,7 +67,7 @@ static void int_exit(int sig)
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(EXIT_OK);
}
@@ -682,7 +682,7 @@ int main(int argc, char **argv)
/* Remove XDP program when program is interrupted */
signal(SIGINT, int_exit);
- if (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 978a532f0748..7eae07d7293e 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -34,9 +34,9 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
- set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
exit(0);
}
@@ -120,13 +120,13 @@ int main(int argc, char **argv)
return 1;
}
- if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
/* Loading dummy XDP prog on out-device */
- if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+ if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 4475d837bf2c..d54e91eb6cbf 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -33,9 +33,9 @@ static __u32 xdp_flags;
static void int_exit(int sig)
{
- set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
- set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
exit(0);
}
@@ -114,13 +114,13 @@ int main(int argc, char **argv)
return 1;
}
- if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
/* Loading dummy XDP prog on out-device */
- if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+ if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 916462112d55..6296741c1fbd 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -37,7 +37,7 @@ static void int_exit(int sig)
int i = 0;
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
@@ -49,7 +49,7 @@ static void close_and_exit(int sig)
close(sock_arp);
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
@@ -183,7 +183,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
int i = 0;
for (i = 0; i < total_ifindex; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
@@ -633,12 +633,12 @@ int main(int ac, char **argv)
}
}
for (i = 0; i < total_ifindex; i++) {
- if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
printf("link set xdp fd failed\n");
int recovery_index = i;
for (i = 0; i < recovery_index; i++)
- set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
return 1;
}
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 32430e8b3a6a..478d95412de4 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -56,7 +56,7 @@ static void int_exit(int sig)
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(EXIT_OK);
}
@@ -521,7 +521,7 @@ int main(int argc, char **argv)
/* Remove XDP program when program is interrupted */
signal(SIGINT, int_exit);
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 715cd12eaca5..f0a787268a87 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -30,7 +30,7 @@ static __u32 xdp_flags = 0;
static void int_exit(int sig)
{
if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
@@ -254,14 +254,14 @@ int main(int argc, char **argv)
}
}
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
return 0;
}
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
new file mode 100644
index 000000000000..8616131e2c61
--- /dev/null
+++ b/tools/include/uapi/linux/if_link.h
@@ -0,0 +1,943 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+ __u32 rx_packets; /* total packets received */
+ __u32 tx_packets; /* total packets transmitted */
+ __u32 rx_bytes; /* total bytes received */
+ __u32 tx_bytes; /* total bytes transmitted */
+ __u32 rx_errors; /* bad packets received */
+ __u32 tx_errors; /* packet transmit problems */
+ __u32 rx_dropped; /* no space in linux buffers */
+ __u32 tx_dropped; /* no space available in linux */
+ __u32 multicast; /* multicast packets received */
+ __u32 collisions;
+
+ /* detailed rx_errors: */
+ __u32 rx_length_errors;
+ __u32 rx_over_errors; /* receiver ring buff overflow */
+ __u32 rx_crc_errors; /* recved pkt with crc error */
+ __u32 rx_frame_errors; /* recv'd frame alignment error */
+ __u32 rx_fifo_errors; /* recv'r fifo overrun */
+ __u32 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u32 tx_aborted_errors;
+ __u32 tx_carrier_errors;
+ __u32 tx_fifo_errors;
+ __u32 tx_heartbeat_errors;
+ __u32 tx_window_errors;
+
+ /* for cslip etc */
+ __u32 rx_compressed;
+ __u32 tx_compressed;
+
+ __u32 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+ __u64 rx_packets; /* total packets received */
+ __u64 tx_packets; /* total packets transmitted */
+ __u64 rx_bytes; /* total bytes received */
+ __u64 tx_bytes; /* total bytes transmitted */
+ __u64 rx_errors; /* bad packets received */
+ __u64 tx_errors; /* packet transmit problems */
+ __u64 rx_dropped; /* no space in linux buffers */
+ __u64 tx_dropped; /* no space available in linux */
+ __u64 multicast; /* multicast packets received */
+ __u64 collisions;
+
+ /* detailed rx_errors: */
+ __u64 rx_length_errors;
+ __u64 rx_over_errors; /* receiver ring buff overflow */
+ __u64 rx_crc_errors; /* recved pkt with crc error */
+ __u64 rx_frame_errors; /* recv'd frame alignment error */
+ __u64 rx_fifo_errors; /* recv'r fifo overrun */
+ __u64 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u64 tx_aborted_errors;
+ __u64 tx_carrier_errors;
+ __u64 tx_fifo_errors;
+ __u64 tx_heartbeat_errors;
+ __u64 tx_window_errors;
+
+ /* for cslip etc */
+ __u64 rx_compressed;
+ __u64 tx_compressed;
+
+ __u64 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+ __u64 mem_start;
+ __u64 mem_end;
+ __u64 base_addr;
+ __u16 irq;
+ __u8 dma;
+ __u8 port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ * Contains nested attributes for address family specific attributes.
+ * Each address family may create a attribute with the address family
+ * number as type and create its own attribute structure in it.
+ *
+ * Example:
+ * [IFLA_AF_SPEC] = {
+ * [AF_INET] = {
+ * [IFLA_INET_CONF] = ...,
+ * },
+ * [AF_INET6] = {
+ * [IFLA_INET6_FLAGS] = ...,
+ * [IFLA_INET6_CONF] = ...,
+ * }
+ * }
+ */
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ __IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+ IFLA_INET_UNSPEC,
+ IFLA_INET_CONF,
+ __IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+ IFF_* flags.
+
+ The only change is:
+ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+ more not changeable by user. They describe link media
+ characteristics and set by device driver.
+
+ Comments:
+ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+ - If neither of these three flags are set;
+ the interface is NBMA.
+
+ - IFF_MULTICAST does not mean anything special:
+ multicasts can be used on all not-NBMA links.
+ IFF_MULTICAST means that this media uses special encapsulation
+ for multicast frames. Apparently, all IFF_POINTOPOINT and
+ IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+ For usual devices it is equal ifi_index.
+ If it is a "virtual interface" (f.e. tunnel), ifi_link
+ can point to real physical interface (f.e. for bandwidth calculations),
+ or maybe 0, what means, that real media is unknown (usual
+ for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+ IFLA_INET6_UNSPEC,
+ IFLA_INET6_FLAGS, /* link flags */
+ IFLA_INET6_CONF, /* sysctl parameters */
+ IFLA_INET6_STATS, /* statistics */
+ IFLA_INET6_MCAST, /* MC things. What of them? */
+ IFLA_INET6_CACHEINFO, /* time values and max reasm size */
+ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
+ IFLA_INET6_TOKEN, /* device token */
+ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ __IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
+enum {
+ BRIDGE_MODE_UNSPEC,
+ BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+ IFLA_BRPORT_UNSPEC,
+ IFLA_BRPORT_STATE, /* Spanning tree state */
+ IFLA_BRPORT_PRIORITY, /* " priority */
+ IFLA_BRPORT_COST, /* " cost */
+ IFLA_BRPORT_MODE, /* mode (hairpin) */
+ IFLA_BRPORT_GUARD, /* bpdu guard */
+ IFLA_BRPORT_PROTECT, /* root port protection */
+ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */
+ IFLA_BRPORT_LEARNING, /* mac learning */
+ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+ IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
+ IFLA_BRPORT_PAD,
+ IFLA_BRPORT_MCAST_FLOOD,
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
+ __IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+ __u32 max_reasm_len;
+ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */
+ __u32 reachable_time;
+ __u32 retrans_time;
+};
+
+enum {
+ IFLA_INFO_UNSPEC,
+ IFLA_INFO_KIND,
+ IFLA_INFO_DATA,
+ IFLA_INFO_XSTATS,
+ IFLA_INFO_SLAVE_KIND,
+ IFLA_INFO_SLAVE_DATA,
+ __IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+ IFLA_VLAN_UNSPEC,
+ IFLA_VLAN_ID,
+ IFLA_VLAN_FLAGS,
+ IFLA_VLAN_EGRESS_QOS,
+ IFLA_VLAN_INGRESS_QOS,
+ IFLA_VLAN_PROTOCOL,
+ __IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+enum {
+ IFLA_VLAN_QOS_UNSPEC,
+ IFLA_VLAN_QOS_MAPPING,
+ __IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+ __u32 from;
+ __u32 to;
+};
+
+/* MACVLAN section */
+enum {
+ IFLA_MACVLAN_UNSPEC,
+ IFLA_MACVLAN_MODE,
+ IFLA_MACVLAN_FLAGS,
+ IFLA_MACVLAN_MACADDR_MODE,
+ IFLA_MACVLAN_MACADDR,
+ IFLA_MACVLAN_MACADDR_DATA,
+ IFLA_MACVLAN_MACADDR_COUNT,
+ __IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */
+ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */
+ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+ MACVLAN_MACADDR_ADD,
+ MACVLAN_MACADDR_DEL,
+ MACVLAN_MACADDR_FLUSH,
+ MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC 1
+
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+ IFLA_VRF_PORT_UNSPEC,
+ IFLA_VRF_PORT_TABLE,
+ __IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+ IFLA_MACSEC_UNSPEC,
+ IFLA_MACSEC_SCI,
+ IFLA_MACSEC_PORT,
+ IFLA_MACSEC_ICV_LEN,
+ IFLA_MACSEC_CIPHER_SUITE,
+ IFLA_MACSEC_WINDOW,
+ IFLA_MACSEC_ENCODING_SA,
+ IFLA_MACSEC_ENCRYPT,
+ IFLA_MACSEC_PROTECT,
+ IFLA_MACSEC_INC_SCI,
+ IFLA_MACSEC_ES,
+ IFLA_MACSEC_SCB,
+ IFLA_MACSEC_REPLAY_PROTECT,
+ IFLA_MACSEC_VALIDATION,
+ IFLA_MACSEC_PAD,
+ __IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+enum macsec_validation_type {
+ MACSEC_VALIDATE_DISABLED = 0,
+ MACSEC_VALIDATE_CHECK = 1,
+ MACSEC_VALIDATE_STRICT = 2,
+ __MACSEC_VALIDATE_END,
+ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE 0x01
+#define IPVLAN_F_VEPA 0x02
+
+/* VXLAN section */
+enum {
+ IFLA_VXLAN_UNSPEC,
+ IFLA_VXLAN_ID,
+ IFLA_VXLAN_GROUP, /* group or remote address */
+ IFLA_VXLAN_LINK,
+ IFLA_VXLAN_LOCAL,
+ IFLA_VXLAN_TTL,
+ IFLA_VXLAN_TOS,
+ IFLA_VXLAN_LEARNING,
+ IFLA_VXLAN_AGEING,
+ IFLA_VXLAN_LIMIT,
+ IFLA_VXLAN_PORT_RANGE, /* source port */
+ IFLA_VXLAN_PROXY,
+ IFLA_VXLAN_RSC,
+ IFLA_VXLAN_L2MISS,
+ IFLA_VXLAN_L3MISS,
+ IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
+ IFLA_VXLAN_UDP_CSUM,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ IFLA_VXLAN_REMCSUM_TX,
+ IFLA_VXLAN_REMCSUM_RX,
+ IFLA_VXLAN_GBP,
+ IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
+ __IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+ __be16 low;
+ __be16 high;
+};
+
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+/* PPP section */
+enum {
+ IFLA_PPP_UNSPEC,
+ IFLA_PPP_DEV_FD,
+ __IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+ GTP_ROLE_GGSN = 0,
+ GTP_ROLE_SGSN,
+};
+
+enum {
+ IFLA_GTP_UNSPEC,
+ IFLA_GTP_FD0,
+ IFLA_GTP_FD1,
+ IFLA_GTP_PDP_HASHSIZE,
+ IFLA_GTP_ROLE,
+ __IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+ IFLA_BOND_UNSPEC,
+ IFLA_BOND_MODE,
+ IFLA_BOND_ACTIVE_SLAVE,
+ IFLA_BOND_MIIMON,
+ IFLA_BOND_UPDELAY,
+ IFLA_BOND_DOWNDELAY,
+ IFLA_BOND_USE_CARRIER,
+ IFLA_BOND_ARP_INTERVAL,
+ IFLA_BOND_ARP_IP_TARGET,
+ IFLA_BOND_ARP_VALIDATE,
+ IFLA_BOND_ARP_ALL_TARGETS,
+ IFLA_BOND_PRIMARY,
+ IFLA_BOND_PRIMARY_RESELECT,
+ IFLA_BOND_FAIL_OVER_MAC,
+ IFLA_BOND_XMIT_HASH_POLICY,
+ IFLA_BOND_RESEND_IGMP,
+ IFLA_BOND_NUM_PEER_NOTIF,
+ IFLA_BOND_ALL_SLAVES_ACTIVE,
+ IFLA_BOND_MIN_LINKS,
+ IFLA_BOND_LP_INTERVAL,
+ IFLA_BOND_PACKETS_PER_SLAVE,
+ IFLA_BOND_AD_LACP_RATE,
+ IFLA_BOND_AD_SELECT,
+ IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ __IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1)
+
+enum {
+ IFLA_BOND_AD_INFO_UNSPEC,
+ IFLA_BOND_AD_INFO_AGGREGATOR,
+ IFLA_BOND_AD_INFO_NUM_PORTS,
+ IFLA_BOND_AD_INFO_ACTOR_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_MAC,
+ __IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+ IFLA_BOND_SLAVE_UNSPEC,
+ IFLA_BOND_SLAVE_STATE,
+ IFLA_BOND_SLAVE_MII_STATUS,
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+ IFLA_BOND_SLAVE_PERM_HWADDR,
+ IFLA_BOND_SLAVE_QUEUE_ID,
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+ IFLA_VF_INFO_UNSPEC,
+ IFLA_VF_INFO,
+ __IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+ IFLA_VF_UNSPEC,
+ IFLA_VF_MAC, /* Hardware queue specific attributes */
+ IFLA_VF_VLAN, /* VLAN ID and QoS */
+ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
+ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
+ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */
+ __IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+ __u32 vf;
+ __u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_vlan {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+};
+
+enum {
+ IFLA_VF_VLAN_INFO_UNSPEC,
+ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */
+ __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+ __u32 vf;
+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+ __u32 vf;
+ __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+ __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+ __u32 vf;
+ __u32 setting;
+};
+
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+ __u32 vf;
+ __u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ IFLA_VF_STATS_PAD,
+ IFLA_VF_STATS_RX_DROPPED,
+ IFLA_VF_STATS_TX_DROPPED,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
+/* VF ports management section
+ *
+ * Nested layout of set/get msg is:
+ *
+ * [IFLA_NUM_VF]
+ * [IFLA_VF_PORTS]
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * ...
+ * [IFLA_PORT_SELF]
+ * [IFLA_PORT_*], ...
+ */
+
+enum {
+ IFLA_VF_PORT_UNSPEC,
+ IFLA_VF_PORT, /* nest */
+ __IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+ IFLA_PORT_UNSPEC,
+ IFLA_PORT_VF, /* __u32 */
+ IFLA_PORT_PROFILE, /* string */
+ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */
+ IFLA_PORT_INSTANCE_UUID, /* binary UUID */
+ IFLA_PORT_HOST_UUID, /* binary UUID */
+ IFLA_PORT_REQUEST, /* __u8 */
+ IFLA_PORT_RESPONSE, /* __u16, output only */
+ __IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX 40
+#define PORT_UUID_MAX 16
+#define PORT_SELF_VF -1
+
+enum {
+ PORT_REQUEST_PREASSOCIATE = 0,
+ PORT_REQUEST_PREASSOCIATE_RR,
+ PORT_REQUEST_ASSOCIATE,
+ PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+ PORT_VDP_RESPONSE_SUCCESS = 0,
+ PORT_VDP_RESPONSE_INVALID_FORMAT,
+ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_VDP_RESPONSE_UNUSED_VTID,
+ PORT_VDP_RESPONSE_VTID_VIOLATION,
+ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+ PORT_VDP_RESPONSE_OUT_OF_SYNC,
+ /* 0x08-0xFF reserved for future VDP use */
+ PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+ PORT_PROFILE_RESPONSE_INPROGRESS,
+ PORT_PROFILE_RESPONSE_INVALID,
+ PORT_PROFILE_RESPONSE_BADSTATE,
+ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+ __u8 vsi_mgr_id;
+ __u8 vsi_type_id[3];
+ __u8 vsi_type_version;
+ __u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+ IFLA_IPOIB_UNSPEC,
+ IFLA_IPOIB_PKEY,
+ IFLA_IPOIB_MODE,
+ IFLA_IPOIB_UMCAST,
+ __IFLA_IPOIB_MAX
+};
+
+enum {
+ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */
+ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR section */
+
+enum {
+ IFLA_HSR_UNSPEC,
+ IFLA_HSR_SLAVE1,
+ IFLA_HSR_SLAVE2,
+ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */
+ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
+ IFLA_HSR_SEQ_NR,
+ IFLA_HSR_VERSION, /* HSR version */
+ __IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+ IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_XSTATS,
+ IFLA_STATS_LINK_XSTATS_SLAVE,
+ IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ IFLA_STATS_AF_SPEC,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ * -> [rtnl link type specific attributes]
+ */
+enum {
+ LINK_XSTATS_TYPE_UNSPEC,
+ LINK_XSTATS_TYPE_BRIDGE,
+ __LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+ IFLA_OFFLOAD_XSTATS_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ __IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
+ XDP_FLAGS_DRV_MODE | \
+ XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
+ XDP_FLAGS_MODES)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+ XDP_ATTACHED_NONE = 0,
+ XDP_ATTACHED_DRV,
+ XDP_ATTACHED_SKB,
+ XDP_ATTACHED_HW,
+};
+
+enum {
+ IFLA_XDP_UNSPEC,
+ IFLA_XDP_FD,
+ IFLA_XDP_ATTACHED,
+ IFLA_XDP_FLAGS,
+ IFLA_XDP_PROG_ID,
+ __IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+ IFLA_EVENT_NONE,
+ IFLA_EVENT_REBOOT, /* internal reset / reboot */
+ IFLA_EVENT_FEATURES, /* change in offload features */
+ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */
+ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */
+ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */
+ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
new file mode 100644
index 000000000000..776bc92e9118
--- /dev/null
+++ b/tools/include/uapi/linux/netlink.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* Unused number */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+#define NETLINK_ECRYPTFS 19
+#define NETLINK_RDMA 20
+#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
+
+#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32
+
+struct sockaddr_nl {
+ __kernel_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* zero */
+ __u32 nl_pid; /* port ID */
+ __u32 nl_groups; /* multicast groups mask */
+};
+
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT 0x100 /* specify tree root */
+#define NLM_F_MATCH 0x200 /* return all matching */
+#define NLM_F_ATOMIC 0x400 /* atomic GET */
+#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE 0x100 /* Override existing */
+#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
+#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL
+ 4.4BSD CHANGE NLM_F_REPLACE
+
+ True CHANGE NLM_F_CREATE|NLM_F_REPLACE
+ Append NLM_F_CREATE
+ Check NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+ /*
+ * followed by the message contents unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0)
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ * message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ * be used - in the success case - to identify a created
+ * object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+ NLMSGERR_ATTR_UNUSED,
+ NLMSGERR_ATTR_MSG,
+ NLMSGERR_ATTR_OFFS,
+ NLMSGERR_ATTR_COOKIE,
+
+ __NLMSGERR_ATTR_MAX,
+ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#endif
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+
+struct nl_pktinfo {
+ __u32 group;
+};
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+ NL_MMAP_STATUS_VALID,
+ NL_MMAP_STATUS_COPY,
+ NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+enum {
+ NETLINK_UNCONNECTED = 0,
+ NETLINK_CONNECTED,
+};
+
+/*
+ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct nlattr) | ing | | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+ __u16 nla_len;
+ __u16 nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ * value = 0x0, and selector = 0x1
+ * implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ * value = 0x2, and selector = 0x2
+ * implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+ __u32 value;
+ __u32 selector;
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index d8749756352d..64c679d67109 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o
+libbpf-y := libbpf.o bpf.o nlattr.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 83714ca1f22b..e6d5f8d1477f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -160,6 +160,12 @@ $(BPF_IN): force elfdep bpfdep
@(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
(diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \
+ (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
+ (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf
$(OUTPUT)libbpf.so: $(BPF_IN)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5128677e4117..592a58a2b681 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: LGPL-2.1
+
/*
* common eBPF ELF operations.
*
@@ -25,6 +27,16 @@
#include <asm/unistd.h>
#include <linux/bpf.h>
#include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
/*
* When building perf, unistd.h is overridden. __NR_bpf is
@@ -46,7 +58,9 @@
# endif
#endif
+#ifndef min
#define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
static inline __u64 ptr_to_u64(const void *ptr)
{
@@ -413,3 +427,124 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
return err;
}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+ socklen_t addrlen;
+ int one = 1;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ return -errno;
+ }
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ fprintf(stderr, "Netlink error reporting not supported\n");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FLAGS;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != sa.nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ nla_dump_errormsg(nh);
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ default:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9f44c196931e..8d18fb73d7fb 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
/*
* common eBPF ELF operations.
*
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 30c776375118..71ddc481f349 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: LGPL-2.1
+
/*
* Common eBPF ELF object loading operations.
*
@@ -106,6 +108,8 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
[ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
[ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
};
int libbpf_strerror(int err, char *buf, size_t size)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6e20003109e0..f85906533cdd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
/*
* Common eBPF ELF object loading operations.
*
@@ -42,6 +44,8 @@ enum libbpf_errno {
LIBBPF_ERRNO__PROG2BIG, /* Program too big */
LIBBPF_ERRNO__KVER, /* Incorrect kernel version */
LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
+ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
+ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
__LIBBPF_ERRNO__END,
};
@@ -246,4 +250,6 @@ long libbpf_get_error(const void *ptr);
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
new file mode 100644
index 000000000000..4719434278b2
--- /dev/null
+++ b/tools/lib/bpf/nlattr.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation version 2.1
+ * of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <string.h>
+#include <stdio.h>
+
+static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
+ [NLA_U8] = sizeof(uint8_t),
+ [NLA_U16] = sizeof(uint16_t),
+ [NLA_U32] = sizeof(uint32_t),
+ [NLA_U64] = sizeof(uint64_t),
+ [NLA_STRING] = 1,
+ [NLA_FLAG] = 0,
+};
+
+static int nla_len(const struct nlattr *nla)
+{
+ return nla->nla_len - NLA_HDRLEN;
+}
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+ int totlen = NLA_ALIGN(nla->nla_len);
+
+ *remaining -= totlen;
+ return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+ return remaining >= sizeof(*nla) &&
+ nla->nla_len >= sizeof(*nla) &&
+ nla->nla_len <= remaining;
+}
+
+static void *nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+ return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+ struct nla_policy *policy)
+{
+ struct nla_policy *pt;
+ unsigned int minlen = 0;
+ int type = nla_type(nla);
+
+ if (type < 0 || type > maxtype)
+ return 0;
+
+ pt = &policy[type];
+
+ if (pt->type > NLA_TYPE_MAX)
+ return 0;
+
+ if (pt->minlen)
+ minlen = pt->minlen;
+ else if (pt->type != NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (nla_len(nla) < minlen)
+ return -1;
+
+ if (pt->maxlen && nla_len(nla) > pt->maxlen)
+ return -1;
+
+ if (pt->type == NLA_STRING) {
+ char *data = nla_data(nla);
+ if (data[nla_len(nla) - 1] != '\0')
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg head Head of attribute stream.
+ * @arg len Length of attribute stream.
+ * @arg policy Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+ struct nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ nla_for_each_attr(nla, head, len, rem) {
+ int type = nla_type(nla);
+
+ if (type > maxtype)
+ continue;
+
+ if (policy) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ if (tb[type])
+ fprintf(stderr, "Attribute of type %#x found multiple times in message, "
+ "previous attribute is being ignored.\n", type);
+
+ tb[type] = nla;
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+/* dump netlink extended ack error message */
+int nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+ struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+ [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING },
+ [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 },
+ };
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+ struct nlmsgerr *err;
+ char *errmsg = NULL;
+ int hlen, alen;
+
+ /* no TLVs, nothing to do here */
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ hlen = sizeof(*err);
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += nlmsg_len(&err->msg);
+
+ attr = (struct nlattr *) ((void *) err + hlen);
+ alen = nlh->nlmsg_len - hlen;
+
+ if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) {
+ fprintf(stderr,
+ "Failed to parse extended error attributes\n");
+ return 0;
+ }
+
+ if (tb[NLMSGERR_ATTR_MSG])
+ errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+ fprintf(stderr, "Kernel error message: %s\n", errmsg);
+
+ return 0;
+}
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
new file mode 100644
index 000000000000..931a71f68f93
--- /dev/null
+++ b/tools/lib/bpf/nlattr.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation version 2.1
+ * of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __NLATTR_H
+#define __NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+ NLA_UNSPEC, /**< Unspecified type, binary data chunk */
+ NLA_U8, /**< 8 bit integer */
+ NLA_U16, /**< 16 bit integer */
+ NLA_U32, /**< 32 bit integer */
+ NLA_U64, /**< 64 bit integer */
+ NLA_STRING, /**< NUL terminated character string */
+ NLA_FLAG, /**< Flag */
+ NLA_MSECS, /**< Micro seconds (64bit) */
+ NLA_NESTED, /**< Nested attributes */
+ __NLA_TYPE_MAX,
+};
+
+#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct nla_policy {
+ /** Type of attribute or NLA_UNSPEC */
+ uint16_t type;
+
+ /** Minimal length of payload required */
+ uint16_t minlen;
+
+ /** Maximal length of payload allowed */
+ uint16_t maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos loop counter, set to current attribute
+ * @arg head head of attribute stream
+ * @arg len length of attribute stream
+ * @arg rem initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ nla_ok(pos, rem); \
+ pos = nla_next(pos, &(rem)))
+
+int nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __NLATTR_H */
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1e09d77f1948..cc15af2e54fe 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -8,5 +8,6 @@ fixdep
test_align
test_dev_cgroup
test_progs
+test_tcpbpf_user
test_verifier_log
feature
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bf05bc5e36e5..566d6adc172a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,7 @@ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
include ../lib.mk
-BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
$(TEST_GEN_PROGS): $(BPFOBJ)
@@ -58,7 +58,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
-%.o: %.c
+$(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 697bd83de295..c0f16e93f9bd 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -7780,6 +7780,20 @@ static struct bpf_test tests[] = {
.result = REJECT,
},
{
+ "XDP, using ifindex from netdev",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .retval = 1,
+ },
+ {
"meta access, test1",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,