summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/io_uring.c16
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/net.h2
-rw-r--r--include/trace/events/xdp.h12
-rw-r--r--include/uapi/linux/bpf.h9
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--net/core/bpf_sk_storage.c1
-rw-r--r--net/core/filter.c18
-rw-r--r--net/core/netclassid_cgroup.c3
-rw-r--r--net/core/netprio_cgroup.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/socket.c27
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--samples/bpf/xdpsock_user.c2
-rwxr-xr-xscripts/bpf_helpers_doc.py4
-rw-r--r--tools/include/uapi/linux/bpf.h9
-rw-r--r--tools/lib/bpf/libbpf.c64
-rw-r--r--tools/lib/bpf/libbpf.h1
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/ringbuf.c6
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile52
-rwxr-xr-xtools/testing/selftests/bpf/ima_setup.sh24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c118
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_module.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c11
-rw-r--r--tools/testing/selftests/bpf/test_progs.c10
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c31
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh259
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c5
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c1074
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h160
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh135
40 files changed, 2063 insertions, 114 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 73c346e503d7..19499b7bb82c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -416,12 +416,11 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
unsigned int napi_id;
struct socket *sock;
struct sock *sk;
- int err;
if (!net_busy_loop_on())
return;
- sock = sock_from_file(epi->ffd.file, &err);
+ sock = sock_from_file(epi->ffd.file);
if (!sock)
return;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a2a7c65a77aa..a97488c7b2cf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4356,9 +4356,9 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
unsigned flags;
int ret;
- sock = sock_from_file(req->file, &ret);
+ sock = sock_from_file(req->file);
if (unlikely(!sock))
- return ret;
+ return -ENOTSOCK;
if (req->async_data) {
kmsg = req->async_data;
@@ -4405,9 +4405,9 @@ static int io_send(struct io_kiocb *req, bool force_nonblock,
unsigned flags;
int ret;
- sock = sock_from_file(req->file, &ret);
+ sock = sock_from_file(req->file);
if (unlikely(!sock))
- return ret;
+ return -ENOTSOCK;
ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
@@ -4585,9 +4585,9 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
unsigned flags;
int ret, cflags = 0;
- sock = sock_from_file(req->file, &ret);
+ sock = sock_from_file(req->file);
if (unlikely(!sock))
- return ret;
+ return -ENOTSOCK;
if (req->async_data) {
kmsg = req->async_data;
@@ -4648,9 +4648,9 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock,
unsigned flags;
int ret, cflags = 0;
- sock = sock_from_file(req->file, &ret);
+ sock = sock_from_file(req->file);
if (unlikely(!sock))
- return ret;
+ return -ENOTSOCK;
if (req->flags & REQ_F_BUFFER_SELECT) {
kbuf = io_recv_buffer_select(req, !force_nonblock);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d05e75ed8c1b..07cb5d15e743 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1859,6 +1859,7 @@ extern const struct bpf_func_proto bpf_snprintf_btf_proto;
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
+extern const struct bpf_func_proto bpf_sock_from_file_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/net.h b/include/linux/net.h
index 0dcd51feef02..9e2324efc26a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -240,7 +240,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg);
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
struct socket *sockfd_lookup(int fd, int *err);
-struct socket *sock_from_file(struct file *file, int *err);
+struct socket *sock_from_file(struct file *file);
#define sockfd_put(sock) fput(sock->file)
int net_ratelimit(void);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index cd24e8a59529..76a97176ab81 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -145,17 +145,17 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_ARGS(dev, xdp, tgt, err, map, index)
);
-#define _trace_xdp_redirect(dev, xdp, to) \
- trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to);
+#define _trace_xdp_redirect(dev, xdp, to) \
+ trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
-#define _trace_xdp_redirect_err(dev, xdp, to, err) \
- trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to);
+#define _trace_xdp_redirect_err(dev, xdp, to, err) \
+ trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
- trace_xdp_redirect(dev, xdp, to, 0, map, index);
+ trace_xdp_redirect(dev, xdp, to, 0, map, index)
#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
- trace_xdp_redirect_err(dev, xdp, to, err, map, index);
+ trace_xdp_redirect_err(dev, xdp, to, err, map, index)
/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7e9931b6c735..77d7c1bb2923 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ * Description
+ * If the given file represents a socket, returns the associated
+ * socket.
+ * Return
+ * A pointer to a struct socket on success or NULL if the file is
+ * not a socket.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3986,6 +3994,7 @@ union bpf_attr {
FN(bprm_opts_set), \
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
+ FN(sock_from_file), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fe7a0733a63a..7e848200cd26 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -224,7 +224,7 @@ static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
{
- return (struct htab_elem *) (htab->elems + i * htab->elem_size);
+ return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
}
static void htab_free_elems(struct bpf_htab *htab)
@@ -280,7 +280,7 @@ static int prealloc_init(struct bpf_htab *htab)
if (!htab_is_percpu(htab) && !htab_is_lru(htab))
num_entries += num_possible_cpus();
- htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
+ htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
htab->map.numa_node);
if (!htab->elems)
return -ENOMEM;
@@ -1412,7 +1412,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
- void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+ void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size;
struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0cd3cc2af9c1..287be337d5f6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2121,8 +2121,11 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (IS_ERR(attach_btf))
return -EINVAL;
if (!btf_is_kernel(attach_btf)) {
+ /* attaching through specifying bpf_prog's BTF
+ * objects directly might be supported eventually
+ */
btf_put(attach_btf);
- return -EINVAL;
+ return -ENOTSUPP;
}
}
} else if (attr->attach_btf_id) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d43b30c92c7d..17270b8404f1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3767,7 +3767,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
goto mark;
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
- state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+ (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
+ env->allow_ptr_leaks)) {
__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
state->stack[spi].slot_type[j] = STACK_MISC;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fb2fbcbf6de6..4be771df5549 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1758,6 +1758,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_tracing_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_tracing_proto;
+ case BPF_FUNC_sock_from_file:
+ return &bpf_sock_from_file_proto;
#endif
case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index a32037daa933..4edd033e899c 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -394,6 +394,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
* use the bpf_sk_storage_(get|delete) helper.
*/
switch (prog->expected_attach_type) {
+ case BPF_TRACE_ITER:
case BPF_TRACE_RAW_TP:
/* bpf_sk_storage has no trace point */
return true;
diff --git a/net/core/filter.c b/net/core/filter.c
index 77001a35768f..255aeee72402 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10413,6 +10413,24 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
+BPF_CALL_1(bpf_sock_from_file, struct file *, file)
+{
+ return (unsigned long)sock_from_file(file);
+}
+
+BTF_ID_LIST(bpf_sock_from_file_btf_ids)
+BTF_ID(struct, socket)
+BTF_ID(struct, file)
+
+const struct bpf_func_proto bpf_sock_from_file_proto = {
+ .func = bpf_sock_from_file,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .ret_btf_id = &bpf_sock_from_file_btf_ids[0],
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_sock_from_file_btf_ids[1],
+};
+
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id)
{
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 41b24cd31562..b49c57d35a88 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -68,9 +68,8 @@ struct update_classid_context {
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
- int err;
struct update_classid_context *ctx = (void *)v;
- struct socket *sock = sock_from_file(file, &err);
+ struct socket *sock = sock_from_file(file);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 9bd4cab7d510..99a431c56f23 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -220,8 +220,7 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n)
{
- int err;
- struct socket *sock = sock_from_file(file, &err);
+ struct socket *sock = sock_from_file(file);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
diff --git a/net/core/sock.c b/net/core/sock.c
index 4fd7e785f177..bbcd4b97eddd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2827,14 +2827,8 @@ EXPORT_SYMBOL(sock_no_mmap);
void __receive_sock(struct file *file)
{
struct socket *sock;
- int error;
- /*
- * The resulting value of "error" is ignored here since we only
- * need to take action when the file is a socket and testing
- * "sock" for NULL is sufficient.
- */
- sock = sock_from_file(file, &error);
+ sock = sock_from_file(file);
if (sock) {
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
sock_update_classid(&sock->sk->sk_cgrp_data);
diff --git a/net/socket.c b/net/socket.c
index bfef11ba35b8..9a240b45bdf3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -445,17 +445,15 @@ static int sock_map_fd(struct socket *sock, int flags)
/**
* sock_from_file - Return the &socket bounded to @file.
* @file: file
- * @err: pointer to an error code return
*
- * On failure returns %NULL and assigns -ENOTSOCK to @err.
+ * On failure returns %NULL.
*/
-struct socket *sock_from_file(struct file *file, int *err)
+struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */
- *err = -ENOTSOCK;
return NULL;
}
EXPORT_SYMBOL(sock_from_file);
@@ -484,9 +482,11 @@ struct socket *sockfd_lookup(int fd, int *err)
return NULL;
}
- sock = sock_from_file(file, err);
- if (!sock)
+ sock = sock_from_file(file);
+ if (!sock) {
+ *err = -ENOTSOCK;
fput(file);
+ }
return sock;
}
EXPORT_SYMBOL(sockfd_lookup);
@@ -498,11 +498,12 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
*err = -EBADF;
if (f.file) {
- sock = sock_from_file(f.file, err);
+ sock = sock_from_file(f.file);
if (likely(sock)) {
*fput_needed = f.flags & FDPUT_FPUT;
return sock;
}
+ *err = -ENOTSOCK;
fdput(f);
}
return NULL;
@@ -1693,9 +1694,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- sock = sock_from_file(file, &err);
- if (!sock)
+ sock = sock_from_file(file);
+ if (!sock) {
+ err = -ENOTSOCK;
goto out;
+ }
err = -ENFILE;
newsock = sock_alloc();
@@ -1818,9 +1821,11 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
struct socket *sock;
int err;
- sock = sock_from_file(file, &err);
- if (!sock)
+ sock = sock_from_file(file);
+ if (!sock) {
+ err = -ENOTSOCK;
goto out;
+ }
err =
security_socket_connect(sock, (struct sockaddr *)address, addrlen);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 310cfc68875a..ac4a317038f1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -564,12 +564,12 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
+ if (unlikely(!xsk_is_bound(xs)))
+ return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
if (unlikely(!xs->rx))
return -ENOBUFS;
- if (unlikely(!xsk_is_bound(xs)))
- return -ENXIO;
if (unlikely(need_wait))
return -EOPNOTSUPP;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 568f9815bb1b..db0cb73513a5 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1275,6 +1275,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
batch_size) {
complete_tx_only(xsk, batch_size);
+ if (benchmark_done)
+ return;
}
for (i = 0; i < batch_size; i++) {
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 8b829748d488..867ada23281c 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -437,6 +437,8 @@ class PrinterHelpers(Printer):
'struct path',
'struct btf_ptr',
'struct inode',
+ 'struct socket',
+ 'struct file',
]
known_types = {
'...',
@@ -482,6 +484,8 @@ class PrinterHelpers(Printer):
'struct path',
'struct btf_ptr',
'struct inode',
+ 'struct socket',
+ 'struct file',
}
mapped_types = {
'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7e9931b6c735..77d7c1bb2923 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ * Description
+ * If the given file represents a socket, returns the associated
+ * socket.
+ * Return
+ * A pointer to a struct socket on success or NULL if the file is
+ * not a socket.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3986,6 +3994,7 @@ union bpf_attr {
FN(bprm_opts_set), \
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
+ FN(sock_from_file), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9be88a90a4aa..6ae748f6ea11 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2518,7 +2518,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
return 0;
}
-static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
{
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
prog->type == BPF_PROG_TYPE_LSM)
@@ -2533,37 +2533,43 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
return false;
}
-static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
{
- bool need_vmlinux_btf = false;
struct bpf_program *prog;
- int i, err;
+ int i;
/* CO-RE relocations need kernel BTF */
if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
- need_vmlinux_btf = true;
+ return true;
/* Support for typed ksyms needs kernel BTF */
for (i = 0; i < obj->nr_extern; i++) {
const struct extern_desc *ext;
ext = &obj->externs[i];
- if (ext->type == EXT_KSYM && ext->ksym.type_id) {
- need_vmlinux_btf = true;
- break;
- }
+ if (ext->type == EXT_KSYM && ext->ksym.type_id)
+ return true;
}
bpf_object__for_each_program(prog, obj) {
if (!prog->load)
continue;
- if (libbpf_prog_needs_vmlinux_btf(prog)) {
- need_vmlinux_btf = true;
- break;
- }
+ if (prog_needs_vmlinux_btf(prog))
+ return true;
}
- if (!need_vmlinux_btf)
+ return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
+{
+ int err;
+
+ /* btf_vmlinux could be loaded earlier */
+ if (obj->btf_vmlinux)
+ return 0;
+
+ if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
obj->btf_vmlinux = libbpf_find_kernel_btf();
@@ -7475,7 +7481,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
}
err = bpf_object__probe_loading(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
@@ -10870,23 +10876,33 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
int attach_prog_fd,
const char *attach_func_name)
{
- int btf_id;
+ int btf_obj_fd = 0, btf_id = 0, err;
if (!prog || attach_prog_fd < 0 || !attach_func_name)
return -EINVAL;
- if (attach_prog_fd)
+ if (prog->obj->loaded)
+ return -EINVAL;
+
+ if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
- else
- btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
- prog->expected_attach_type);
-
- if (btf_id < 0)
- return btf_id;
+ if (btf_id < 0)
+ return btf_id;
+ } else {
+ /* load btf_vmlinux, if not yet */
+ err = bpf_object__load_vmlinux_btf(prog->obj, true);
+ if (err)
+ return err;
+ err = find_kernel_btf_id(prog->obj, attach_func_name,
+ prog->expected_attach_type,
+ &btf_obj_fd, &btf_id);
+ if (err)
+ return err;
+ }
prog->attach_btf_id = btf_id;
- prog->attach_btf_obj_fd = 0;
+ prog->attach_btf_obj_fd = btf_obj_fd;
prog->attach_prog_fd = attach_prog_fd;
return 0;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..3c35eb401931 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ring_buffer_sample_fn sample_cb, void *ctx);
LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
/* Perf buffer APIs */
struct perf_buffer;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7c4126542e2b..1c0fd2dd233a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,6 +346,7 @@ LIBBPF_0.3.0 {
btf__parse_split;
btf__new_empty_split;
btf__new_split;
+ ring_buffer__epoll_fd;
xsk_setup_xdp_prog;
xsk_socket__update_xskmap;
} LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 98537ff2679e..8caaafe7e312 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
}
return cnt < 0 ? -errno : res;
}
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) */
+int ring_buffer__epoll_fd(const struct ring_buffer *rb)
+{
+ return rb->epoll_fd;
+}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 752d8edddc66..f5b7ef93618c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,3 +36,4 @@ test_cpp
/runqslower
/bench
*.ko
+xdpxceiver
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..8c33e999319a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,6 @@ ifneq ($(wildcard $(GENHDR)),)
endif
CLANG ?= clang
-LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
@@ -46,7 +45,8 @@ endif
TEST_GEN_FILES =
TEST_FILES = test_lwt_ip_encap.o \
- test_tc_edt.o
+ test_tc_edt.o \
+ xsk_prereqs.sh
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -70,17 +70,17 @@ TEST_PROGS := test_kmod.sh \
test_bpftool_build.sh \
test_bpftool.sh \
test_bpftool_metadata.sh \
+ test_xsk.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
- tcp_client.py \
- tcp_server.py \
test_xdp_vlan.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
+ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
+ xdpxceiver
TEST_CUSTOM_PROGS = urandom_read
@@ -115,6 +115,13 @@ INCLUDE_DIR := $(SCRATCH_DIR)/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
@@ -139,6 +146,7 @@ $(OUTPUT)/urandom_read: urandom_read.c
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
+ $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
$(Q)cp bpf_testmod/bpf_testmod.ko $@
@@ -146,13 +154,6 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
$(Q)$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
- $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
- ../../../../vmlinux \
- /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
@@ -251,31 +252,19 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $1 - input .c file
# $2 - output .o file
# $3 - CFLAGS
-# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
- $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
- $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
-endef
-# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
-define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)($(CLANG) $3 -O2 -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -330,8 +319,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(INCLUDE_DIR)/vmlinux.h \
$(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
- $(TRUNNER_BPF_CFLAGS), \
- $(TRUNNER_BPF_LDFLAGS))
+ $(TRUNNER_BPF_CFLAGS))
$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
$(TRUNNER_OUTPUT)/%.o \
@@ -399,19 +387,16 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_LDFLAGS := -mattr=+alu32
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
# Define test_progs-no_alu32 test runner.
TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
-TRUNNER_BPF_LDFLAGS :=
$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
# Define test_progs BPF-GCC-flavored test runner.
ifneq ($(BPF_GCC),)
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
-TRUNNER_BPF_LDFLAGS :=
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
endif
@@ -422,7 +407,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c
TRUNNER_EXTRA_FILES :=
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
TRUNNER_BPF_CFLAGS :=
-TRUNNER_BPF_LDFLAGS :=
$(eval $(call DEFINE_TEST_RUNNER,test_maps))
# Define test_verifier test runner.
diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 2bfc646bc230..8e62581113a3 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -7,6 +7,8 @@ set -o pipefail
IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
TEST_BINARY="/bin/true"
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
usage()
{
@@ -75,6 +77,19 @@ run()
exec "${copied_bin_path}"
}
+catch()
+{
+ local exit_code="$1"
+ local log_file="$2"
+
+ if [[ "${exit_code}" -ne 0 ]]; then
+ cat "${log_file}" >&3
+ fi
+
+ rm -f "${log_file}"
+ exit ${exit_code}
+}
+
main()
{
[[ $# -ne 2 ]] && usage
@@ -96,4 +111,13 @@ main()
fi
}
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+ # Save the stderr to 3 so that we can output back to
+ # it incase of an error.
+ exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 448885b95eed..0e586368948d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -20,6 +20,7 @@
#include "bpf_iter_bpf_percpu_hash_map.skel.h"
#include "bpf_iter_bpf_array_map.skel.h"
#include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_helpers.skel.h"
#include "bpf_iter_bpf_sk_storage_map.skel.h"
#include "bpf_iter_test_kern5.skel.h"
#include "bpf_iter_test_kern6.skel.h"
@@ -913,6 +914,119 @@ out:
bpf_iter_bpf_percpu_array_map__destroy(skel);
}
+/* An iterator program deletes all local storage in a map. */
+static void test_bpf_sk_storage_delete(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_sk_storage_helpers *skel;
+ union bpf_iter_link_info linfo;
+ int err, len, map_fd, iter_fd;
+ struct bpf_link *link;
+ int sock_fd = -1;
+ __u32 val = 42;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+ goto out;
+ err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
+ &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+ if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+ "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ if (sock_fd >= 0)
+ close(sock_fd);
+ bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
+/* This creates a socket and its local storage. It then runs a task_iter BPF
+ * program that replaces the existing socket local storage with the tgid of the
+ * only task owning a file descriptor to this socket, this process, prog_tests.
+ * It then runs a tcp socket iterator that negates the value in the existing
+ * socket local storage, the test verifies that the resulting value is -pid.
+ */
+static void test_bpf_sk_storage_get(void)
+{
+ struct bpf_iter_bpf_sk_storage_helpers *skel;
+ int err, map_fd, val = -1;
+ int sock_fd = -1;
+
+ skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+ goto out;
+
+ err = listen(sock_fd, 1);
+ if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+ goto close_socket;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+ err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+ goto close_socket;
+
+ do_dummy_read(skel->progs.fill_socket_owner);
+
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+ if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+ "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+ getpid(), val, err))
+ goto close_socket;
+
+ do_dummy_read(skel->progs.negate_socket_local_storage);
+
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+ CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
+ "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+ -getpid(), val, err);
+
+close_socket:
+ close(sock_fd);
+out:
+ bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
static void test_bpf_sk_storage_map(void)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1067,6 +1181,10 @@ void test_bpf_iter(void)
test_bpf_percpu_array_map();
if (test__start_subtest("bpf_sk_storage_map"))
test_bpf_sk_storage_map();
+ if (test__start_subtest("bpf_sk_storage_delete"))
+ test_bpf_sk_storage_delete();
+ if (test__start_subtest("bpf_sk_storage_get"))
+ test_bpf_sk_storage_get();
if (test__start_subtest("rdonly-buf-out-of-bound"))
test_rdonly_buf_out_of_bound();
if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 4b65e9918764..50796b651f72 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -28,10 +28,18 @@ void test_module_attach(void)
struct test_module_attach__bss *bss;
int err;
- skel = test_module_attach__open_and_load();
+ skel = test_module_attach__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
+ err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
+ 0, "bpf_testmod_test_read");
+ ASSERT_OK(err, "set_attach_target");
+
+ err = test_module_attach__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+ return;
+
bss = skel->bss;
err = test_module_attach__attach(skel);
@@ -44,6 +52,7 @@ void test_module_attach(void)
ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+ ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
new file mode 100644
index 000000000000..6cecab2b32ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google LLC. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("iter/bpf_sk_storage_map")
+int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ if (ctx->sk)
+ bpf_sk_storage_delete(&sk_stg_map, ctx->sk);
+
+ return 0;
+}
+
+SEC("iter/task_file")
+int fill_socket_owner(struct bpf_iter__task_file *ctx)
+{
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ struct socket *sock;
+ int *sock_tgid;
+
+ if (!task || !file)
+ return 0;
+
+ sock = bpf_sock_from_file(file);
+ if (!sock)
+ return 0;
+
+ sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0);
+ if (!sock_tgid)
+ return 0;
+
+ *sock_tgid = task->tgid;
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int negate_socket_local_storage(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ int *sock_tgid;
+
+ if (!sk_common)
+ return 0;
+
+ sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0);
+ if (!sock_tgid)
+ return 0;
+
+ *sock_tgid = -*sock_tgid;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index 4983087852a0..b7f32c160f4e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -11,9 +11,10 @@ int dump_task(struct bpf_iter__task *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
+ static char info[] = " === END ===";
if (task == (void *)0) {
- BPF_SEQ_PRINTF(seq, " === END ===\n");
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index 56363959f7b0..f59f175c7baf 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -40,6 +40,7 @@ int BPF_PROG(test_core_module_probed,
struct task_struct *task,
struct bpf_testmod_test_read_ctx *read_ctx)
{
+#if __has_builtin(__builtin_preserve_enum_value)
struct core_reloc_module_output *out = (void *)&data.out;
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -61,6 +62,9 @@ int BPF_PROG(test_core_module_probed,
out->len_exists = bpf_core_field_exists(read_ctx->len);
out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+ data.skip = true;
+#endif
return 0;
}
@@ -70,6 +74,7 @@ int BPF_PROG(test_core_module_direct,
struct task_struct *task,
struct bpf_testmod_test_read_ctx *read_ctx)
{
+#if __has_builtin(__builtin_preserve_enum_value)
struct core_reloc_module_output *out = (void *)&data.out;
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -91,6 +96,9 @@ int BPF_PROG(test_core_module_direct,
out->len_exists = bpf_core_field_exists(read_ctx->len);
out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+ data.skip = true;
+#endif
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index b563563df172..efd1e287ac17 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -38,6 +38,17 @@ int BPF_PROG(handle_fentry,
return 0;
}
+__u32 fentry_manual_read_sz = 0;
+
+SEC("fentry/placeholder")
+int BPF_PROG(handle_fentry_manual,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_manual_read_sz = len;
+ return 0;
+}
+
__u32 fexit_read_sz = 0;
int fexit_ret = 0;
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5ef081bdae4e..7d077d48cadd 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -587,6 +587,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return -EINVAL;
}
}
+
+ if (env->verbosity > VERBOSE_NONE) {
+ if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
+ fprintf(stderr,
+ "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
+ errno);
+ return -1;
+ }
+ }
+
break;
case ARG_GET_TEST_CNT:
env->get_test_cnt = true;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4bfe3aa2cfc4..777a81404fdb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -875,19 +875,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
__u8 tmp[TEST_DATA_LEN << 2];
__u32 size_tmp = sizeof(tmp);
uint32_t retval;
- int err;
+ int err, saved_errno;
if (unpriv)
set_admin(true);
err = bpf_prog_test_run(fd_prog, 1, data, size_data,
tmp, &size_tmp, &retval, NULL);
+ saved_errno = errno;
+
if (unpriv)
set_admin(false);
- if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
- printf("Unexpected bpf_prog_test_run error ");
- return err;
+
+ if (err) {
+ switch (saved_errno) {
+ case 524/*ENOTSUPP*/:
+ printf("Did not run the program (not supported) ");
+ return 0;
+ case EPERM:
+ if (unpriv) {
+ printf("Did not run the program (no permission) ");
+ return 0;
+ }
+ /* fallthrough; */
+ default:
+ printf("FAIL: Unexpected bpf_prog_test_run error (%s) ",
+ strerror(saved_errno));
+ return err;
+ }
}
- if (!err && retval != expected_val &&
+
+ if (retval != expected_val &&
expected_val != POINTER_VALUE) {
printf("FAIL retval %d != %d ", retval, expected_val);
return 1;
@@ -936,6 +953,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
int run_errs, run_successes;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
+ int saved_errno;
int fixup_skips;
__u32 pflags;
int i, err;
@@ -997,6 +1015,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+ saved_errno = errno;
/* BPF_PROG_TYPE_TRACING requires more setup and
* bpf_probe_prog_type won't give correct answer
@@ -1013,7 +1032,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
if (fd_prog < 0) {
printf("FAIL\nFailed to load prog '%s'!\n",
- strerror(errno));
+ strerror(saved_errno));
goto fail_log;
}
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
new file mode 100755
index 000000000000..88a7483eaae4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation, Weqaar Janjua <weqaar.a.janjua@intel.com>
+
+# AF_XDP selftests based on veth
+#
+# End-to-end AF_XDP over Veth test
+#
+# Topology:
+# ---------
+# -----------
+# _ | Process | _
+# / ----------- \
+# / | \
+# / | \
+# ----------- | -----------
+# | Thread1 | | | Thread2 |
+# ----------- | -----------
+# | | |
+# ----------- | -----------
+# | xskX | | | xskY |
+# ----------- | -----------
+# | | |
+# ----------- | ----------
+# | vethX | --------- | vethY |
+# ----------- peer ----------
+# | | |
+# namespaceX | namespaceY
+#
+# AF_XDP is an address family optimized for high performance packet processing,
+# it is XDP’s user-space interface.
+#
+# An AF_XDP socket is linked to a single UMEM which is a region of virtual
+# contiguous memory, divided into equal-sized frames.
+#
+# Refer to AF_XDP Kernel Documentation for detailed information:
+# https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+#
+# Prerequisites setup by script:
+#
+# Set up veth interfaces as per the topology shown ^^:
+# * setup two veth interfaces and one namespace
+# ** veth<xxxx> in root namespace
+# ** veth<yyyy> in af_xdp<xxxx> namespace
+# ** namespace af_xdp<xxxx>
+# * create a spec file veth.spec that includes this run-time configuration
+# *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
+# conflict with any existing interface
+# * tests the veth and xsk layers of the topology
+#
+# See the source xdpxceiver.c for information on each test
+#
+# Kernel configuration:
+# ---------------------
+# See "config" file for recommended kernel config options.
+#
+# Turn on XDP sockets and veth support when compiling i.e.
+# Networking support -->
+# Networking options -->
+# [ * ] XDP sockets
+#
+# Executing Tests:
+# ----------------
+# Must run with CAP_NET_ADMIN capability.
+#
+# Run (full color-coded output):
+# sudo ./test_xsk.sh -c
+#
+# If running from kselftests:
+# sudo make colorconsole=1 run_tests
+#
+# Run (full output without color-coding):
+# sudo ./test_xsk.sh
+
+. xsk_prereqs.sh
+
+while getopts c flag
+do
+ case "${flag}" in
+ c) colorconsole=1;;
+ esac
+done
+
+TEST_NAME="PREREQUISITES"
+
+URANDOM=/dev/urandom
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+
+VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH0=ve${VETH0_POSTFIX}
+VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH1=ve${VETH1_POSTFIX}
+NS0=root
+NS1=af_xdp${VETH1_POSTFIX}
+MTU=1500
+
+setup_vethPairs() {
+ echo "setting up ${VETH0}: namespace: ${NS0}"
+ ip netns add ${NS1}
+ ip link add ${VETH0} type veth peer name ${VETH1}
+ if [ -f /proc/net/if_inet6 ]; then
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+ fi
+ echo "setting up ${VETH1}: namespace: ${NS1}"
+ ip link set ${VETH1} netns ${NS1}
+ ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+ ip link set ${VETH0} mtu ${MTU}
+ ip netns exec ${NS1} ip link set ${VETH1} up
+ ip link set ${VETH0} up
+}
+
+validate_root_exec
+validate_veth_support ${VETH0}
+validate_ip_utility
+setup_vethPairs
+
+retval=$?
+if [ $retval -ne 0 ]; then
+ test_status $retval "${TEST_NAME}"
+ cleanup_exit ${VETH0} ${VETH1} ${NS1}
+ exit $retval
+fi
+
+echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
+
+validate_veth_spec_file
+
+echo "Spec file created: ${SPECFILE}"
+
+test_status $retval "${TEST_NAME}"
+
+## START TESTS
+
+statusList=()
+
+### TEST 1
+TEST_NAME="XSK KSELFTEST FRAMEWORK"
+
+echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+retval=$?
+if [ $retval -eq 0 ]; then
+ echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+ vethXDPnative ${VETH0} ${VETH1} ${NS1}
+fi
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 2
+TEST_NAME="SKB NOPOLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 3
+TEST_NAME="SKB POLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 4
+TEST_NAME="DRV NOPOLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 5
+TEST_NAME="DRV POLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 6
+TEST_NAME="SKB SOCKET TEARDOWN"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 7
+TEST_NAME="DRV SOCKET TEARDOWN"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 8
+TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 9
+TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+## END TESTS
+
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+
+for _status in "${statusList[@]}"
+do
+ if [ $_status -ne 0 ]; then
+ test_exit $ksft_fail 0
+ fi
+done
+
+test_exit $ksft_pass 0
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 91bb77c24a2e..a3fe0fbaed41 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -108,8 +108,9 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "invalid indirect read from stack off -8+0 size 8",
- .result = REJECT,
+ .errstr_unpriv = "invalid indirect read from stack off -8+0 size 8",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
},
{
"unpriv: mangle pointer on stack 1",
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
new file mode 100644
index 000000000000..014dedaa4dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -0,0 +1,1074 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * The following tests are run:
+ *
+ * 1. AF_XDP SKB mode
+ * Generic mode XDP is driver independent, used when the driver does
+ * not have support for XDP. Works on any netdevice using sockets and
+ * generic XDP path. XDP hook from netif_receive_skb().
+ * a. nopoll - soft-irq processing
+ * b. poll - using poll() syscall
+ * c. Socket Teardown
+ * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ * both sockets, then repeat multiple times. Only nopoll mode is used
+ * d. Bi-directional sockets
+ * Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ * completion rings on each socket, tx/rx in both directions. Only nopoll
+ * mode is used
+ *
+ * 2. AF_XDP DRV/Native mode
+ * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
+ * packets before SKB allocation. Provides better performance than SKB. Driver
+ * hook available just after DMA of buffer descriptor.
+ * a. nopoll
+ * b. poll
+ * c. Socket Teardown
+ * d. Bi-directional sockets
+ * - Only copy mode is supported because veth does not currently support
+ * zero-copy mode
+ *
+ * Total tests: 8
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface within their assigned
+ * namespaces
+ * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ * veth interface
+ * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ * and have the right content
+ *
+ * Enable/disable debug mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <asm/barrier.h>
+typedef __u16 __sum16;
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <bpf/xsk.h>
+#include "xdpxceiver.h"
+#include "../kselftest.h"
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+ ksft_test_result_fail
+ ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+ ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+#define print_ksft_result(void)\
+ (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+ "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
+ opt_bidi ? "Bi-directional Sockets" : ""))
+
+static void pthread_init_mutex(void)
+{
+ pthread_mutex_init(&sync_mutex, NULL);
+ pthread_mutex_init(&sync_mutex_tx, NULL);
+ pthread_cond_init(&signal_rx_condition, NULL);
+ pthread_cond_init(&signal_tx_condition, NULL);
+}
+
+static void pthread_destroy_mutex(void)
+{
+ pthread_mutex_destroy(&sync_mutex);
+ pthread_mutex_destroy(&sync_mutex_tx);
+ pthread_cond_destroy(&signal_rx_condition);
+ pthread_cond_destroy(&signal_tx_condition);
+}
+
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+ u32 *ptr = (u32 *)dest;
+ int i;
+
+ val = htonl(val);
+
+ for (i = 0; i < (size & (~0x3)); i += 4)
+ ptr[i >> 2] = val;
+
+ for (; i < size; i++)
+ ((char *)dest)[i] = ((char *)&val)[i & 3];
+
+ return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16 csum_fold(__u32 csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __u16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __u32)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+{
+ u32 csum = 0;
+ u32 cnt = 0;
+
+ /* udp hdr and data */
+ for (; cnt < len; cnt += 2)
+ csum += udp_pkt[cnt >> 1];
+
+ return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr)
+{
+ memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_IP);
+}
+
+static void gen_ip_hdr(void *data, struct iphdr *ip_hdr)
+{
+ ip_hdr->version = IP_PKT_VER;
+ ip_hdr->ihl = 0x5;
+ ip_hdr->tos = IP_PKT_TOS;
+ ip_hdr->tot_len = htons(IP_PKT_SIZE);
+ ip_hdr->id = 0;
+ ip_hdr->frag_off = 0;
+ ip_hdr->ttl = IPDEFTTL;
+ ip_hdr->protocol = IPPROTO_UDP;
+ ip_hdr->saddr = ((struct ifobject *)data)->src_ip;
+ ip_hdr->daddr = ((struct ifobject *)data)->dst_ip;
+ ip_hdr->check = 0;
+}
+
+static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr)
+{
+ udp_hdr->source = htons(((struct ifobject *)arg)->src_port);
+ udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port);
+ udp_hdr->len = htons(UDP_PKT_SIZE);
+ memset32_htonl(pkt_data + PKT_HDR_SIZE,
+ htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE);
+}
+
+static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+{
+ udp_hdr->check = 0;
+ udp_hdr->check =
+ udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+{
+ memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
+}
+
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+{
+ int ret;
+
+ data->umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!data->umem)
+ exit_with_error(errno);
+
+ ret = xsk_umem__create(&data->umem->umem, buffer, size,
+ &data->umem->fq, &data->umem->cq, NULL);
+ if (ret)
+ exit_with_error(ret);
+
+ data->umem->buffer = buffer;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+ int ret, i;
+ u32 idx;
+
+ ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
+ if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ exit_with_error(ret);
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+}
+
+static int xsk_configure_socket(struct ifobject *ifobject)
+{
+ struct xsk_socket_config cfg;
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
+ int ret;
+
+ ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!ifobject->xsk)
+ exit_with_error(errno);
+
+ ifobject->xsk->umem = ifobject->umem;
+ cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg.libbpf_flags = 0;
+ cfg.xdp_flags = opt_xdp_flags;
+ cfg.bind_flags = opt_xdp_bind_flags;
+
+ if (!opt_bidi) {
+ rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+ txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+ } else {
+ rxr = &ifobject->xsk->rx;
+ txr = &ifobject->xsk->tx;
+ }
+
+ ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
+ opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
+
+ if (ret)
+ return 1;
+
+ return 0;
+}
+
+static struct option long_options[] = {
+ {"interface", required_argument, 0, 'i'},
+ {"queue", optional_argument, 0, 'q'},
+ {"poll", no_argument, 0, 'p'},
+ {"xdp-skb", no_argument, 0, 'S'},
+ {"xdp-native", no_argument, 0, 'N'},
+ {"copy", no_argument, 0, 'c'},
+ {"tear-down", no_argument, 0, 'T'},
+ {"bidi", optional_argument, 0, 'B'},
+ {"debug", optional_argument, 0, 'D'},
+ {"tx-pkt-count", optional_argument, 0, 'C'},
+ {0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+ const char *str =
+ " Usage: %s [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -q, --queue=n Use queue n (default 0)\n"
+ " -p, --poll Use poll syscall\n"
+ " -S, --xdp-skb=n Use XDP SKB mode\n"
+ " -N, --xdp-native=n Enforce XDP DRV (native) mode\n"
+ " -c, --copy Force copy mode\n"
+ " -T, --tear-down Tear down sockets by repeatedly recreating them\n"
+ " -B, --bidi Bi-directional sockets test\n"
+ " -D, --debug Debug mode - dump packets L2 - L5\n"
+ " -C, --tx-pkt-count=n Number of packets to send\n";
+ ksft_print_msg(str, prog);
+}
+
+static bool switch_namespace(int idx)
+{
+ char fqns[26] = "/var/run/netns/";
+ int nsfd;
+
+ strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+ nsfd = open(fqns, O_RDONLY);
+
+ if (nsfd == -1)
+ exit_with_error(errno);
+
+ if (setns(nsfd, 0) == -1)
+ exit_with_error(errno);
+
+ return true;
+}
+
+static void *nsswitchthread(void *args)
+{
+ if (switch_namespace(((struct targs *)args)->idx)) {
+ ifdict[((struct targs *)args)->idx]->ifindex =
+ if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname);
+ if (!ifdict[((struct targs *)args)->idx]->ifindex) {
+ ksft_test_result_fail
+ ("ERROR: [%s] interface \"%s\" does not exist\n",
+ __func__, ifdict[((struct targs *)args)->idx]->ifname);
+ ((struct targs *)args)->retptr = false;
+ } else {
+ ksft_print_msg("Interface found: %s\n",
+ ifdict[((struct targs *)args)->idx]->ifname);
+ ((struct targs *)args)->retptr = true;
+ }
+ } else {
+ ((struct targs *)args)->retptr = false;
+ }
+ pthread_exit(NULL);
+}
+
+static int validate_interfaces(void)
+{
+ bool ret = true;
+
+ for (int i = 0; i < MAX_INTERFACES; i++) {
+ if (!strcmp(ifdict[i]->ifname, "")) {
+ ret = false;
+ ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
+ }
+ if (strcmp(ifdict[i]->nsname, "")) {
+ struct targs *targs;
+
+ targs = (struct targs *)malloc(sizeof(struct targs));
+ if (!targs)
+ exit_with_error(errno);
+
+ targs->idx = i;
+ if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs))
+ exit_with_error(errno);
+
+ pthread_join(ns_thread, NULL);
+
+ if (targs->retptr)
+ ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+
+ free(targs);
+ } else {
+ ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
+ if (!ifdict[i]->ifindex) {
+ ksft_test_result_fail
+ ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
+ ret = false;
+ } else {
+ ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+ }
+ }
+ }
+ return ret;
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+ int option_index, interface_index = 0, c;
+
+ opterr = 0;
+
+ for (;;) {
+ c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'i':
+ if (interface_index == MAX_INTERFACES)
+ break;
+ char *sptr, *token;
+
+ sptr = strndupa(optarg, strlen(optarg));
+ memcpy(ifdict[interface_index]->ifname,
+ strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+ token = strsep(&sptr, ",");
+ if (token)
+ memcpy(ifdict[interface_index]->nsname, token,
+ MAX_INTERFACES_NAMESPACE_CHARS);
+ interface_index++;
+ break;
+ case 'q':
+ opt_queue = atoi(optarg);
+ break;
+ case 'p':
+ opt_poll = 1;
+ break;
+ case 'S':
+ opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
+ opt_xdp_bind_flags |= XDP_COPY;
+ uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
+ break;
+ case 'N':
+ opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+ opt_xdp_bind_flags |= XDP_COPY;
+ uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
+ break;
+ case 'c':
+ opt_xdp_bind_flags |= XDP_COPY;
+ break;
+ case 'T':
+ opt_teardown = 1;
+ break;
+ case 'B':
+ opt_bidi = 1;
+ break;
+ case 'D':
+ debug_pkt_dump = 1;
+ break;
+ case 'C':
+ opt_pkt_count = atoi(optarg);
+ break;
+ default:
+ usage(basename(argv[0]));
+ ksft_exit_xfail();
+ }
+ }
+
+ if (!validate_interfaces()) {
+ usage(basename(argv[0]));
+ ksft_exit_xfail();
+ }
+}
+
+static void kick_tx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+ return;
+ exit_with_error(errno);
+}
+
+static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+{
+ unsigned int rcvd;
+ u32 idx;
+
+ if (!xsk->outstanding_tx)
+ return;
+
+ if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+ if (rcvd) {
+ xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+ xsk->outstanding_tx -= rcvd;
+ xsk->tx_npkts += rcvd;
+ }
+}
+
+static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+{
+ unsigned int rcvd, i;
+ u32 idx_rx = 0, idx_fq = 0;
+ int ret;
+
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(ret);
+ }
+ return;
+ }
+
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (ret < 0)
+ exit_with_error(ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(ret);
+ }
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ }
+
+ for (i = 0; i < rcvd; i++) {
+ u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+ (void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+ u64 orig = xsk_umem__extract_addr(addr);
+
+ addr = xsk_umem__add_offset_to_addr(addr);
+ pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
+ if (!pkt_node_rx)
+ exit_with_error(errno);
+
+ pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE);
+ if (!pkt_node_rx->pkt_frame)
+ exit_with_error(errno);
+
+ memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
+ PKT_SIZE);
+
+ TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ }
+
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
+ xsk->rx_npkts += rcvd;
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+{
+ u32 idx;
+ unsigned int i;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
+ complete_tx_only(xsk, batch_size);
+
+ for (i = 0; i < batch_size; i++) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ tx_desc->len = PKT_SIZE;
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, batch_size);
+ xsk->outstanding_tx += batch_size;
+ *frameptr += batch_size;
+ *frameptr %= num_frames;
+ complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+ if (!opt_pkt_count)
+ return BATCH_SIZE;
+
+ if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
+ return BATCH_SIZE;
+
+ return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void *arg)
+{
+ bool pending;
+
+ do {
+ pending = false;
+ if (((struct ifobject *)arg)->xsk->outstanding_tx) {
+ complete_tx_only(((struct ifobject *)
+ arg)->xsk, BATCH_SIZE);
+ pending = !!((struct ifobject *)arg)->xsk->outstanding_tx;
+ }
+ } while (pending);
+}
+
+static void tx_only_all(void *arg)
+{
+ struct pollfd fds[MAX_SOCKS] = { };
+ u32 frame_nb = 0;
+ int pkt_cnt = 0;
+ int ret;
+
+ fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+ fds[0].events = POLLOUT;
+
+ while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+ int batch_size = get_batch_size(pkt_cnt);
+
+ if (opt_poll) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret <= 0)
+ continue;
+
+ if (!(fds[0].revents & POLLOUT))
+ continue;
+ }
+
+ tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size);
+ pkt_cnt += batch_size;
+ }
+
+ if (opt_pkt_count)
+ complete_tx_only_all(arg);
+}
+
+static void worker_pkt_dump(void)
+{
+ struct in_addr ipaddr;
+
+ fprintf(stdout, "---------------------------------------\n");
+ for (int iter = 0; iter < num_frames - 1; iter++) {
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (int i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ((struct ethhdr *)
+ pkt_buf[iter]->payload)->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (int i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ((struct ethhdr *)
+ pkt_buf[iter]->payload)->h_source[i]);
+
+ /*extract L3 frame */
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
+ ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
+
+ ipaddr.s_addr =
+ ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
+
+ ipaddr.s_addr =
+ ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
+
+ /*extract L4 frame */
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
+ ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr)))->source));
+
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
+ ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr)))->dest));
+ /*extract L5 frame */
+ int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+
+ if (payload == EOT) {
+ ksft_print_msg("End-of-tranmission frame received\n");
+ fprintf(stdout, "---------------------------------------\n");
+ break;
+ }
+ fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+ fprintf(stdout, "---------------------------------------\n");
+ }
+}
+
+static void worker_pkt_validate(void)
+{
+ u32 payloadseqnum = -2;
+
+ while (1) {
+ pkt_node_rx_q = malloc(sizeof(struct pkt));
+ pkt_node_rx_q = TAILQ_LAST(&head, head_s);
+ if (!pkt_node_rx_q)
+ break;
+ /*do not increment pktcounter if !(tos=0x9 and ipv4) */
+ if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+ sizeof(struct ethhdr)))->version == IP_PKT_VER)
+ && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos ==
+ IP_PKT_TOS)) {
+ payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
+ if (debug_pkt_dump && payloadseqnum != EOT) {
+ pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame));
+ pkt_obj->payload = (char *)malloc(PKT_SIZE);
+ memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
+ pkt_buf[payloadseqnum] = pkt_obj;
+ }
+
+ if (payloadseqnum == EOT) {
+ ksft_print_msg("End-of-tranmission frame received: PASS\n");
+ sigvar = 1;
+ break;
+ }
+
+ if (prev_pkt + 1 != payloadseqnum) {
+ ksft_test_result_fail
+ ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
+ __func__, prev_pkt, payloadseqnum);
+ ksft_exit_xfail();
+ }
+
+ TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+ free(pkt_node_rx_q->pkt_frame);
+ free(pkt_node_rx_q);
+ pkt_node_rx_q = NULL;
+ prev_pkt = payloadseqnum;
+ pkt_counter++;
+ } else {
+ ksft_print_msg("Invalid frame received: ");
+ ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n",
+ ((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+ sizeof(struct ethhdr)))->version,
+ ((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+ sizeof(struct ethhdr)))->tos);
+ TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+ free(pkt_node_rx_q->pkt_frame);
+ free(pkt_node_rx_q);
+ pkt_node_rx_q = NULL;
+ }
+ }
+}
+
+static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr,
+ atomic_int *spinningptr)
+{
+ int ctr = 0;
+ int ret;
+
+ xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ ret = xsk_configure_socket((struct ifobject *)arg);
+
+ /* Retry Create Socket if it fails as xsk_socket__create()
+ * is asynchronous
+ *
+ * Essential to lock Mutex here to prevent Tx thread from
+ * entering before Rx and causing a deadlock
+ */
+ pthread_mutex_lock(mutexptr);
+ while (ret && ctr < SOCK_RECONF_CTR) {
+ atomic_store(spinningptr, 1);
+ xsk_configure_umem((struct ifobject *)arg,
+ bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ ret = xsk_configure_socket((struct ifobject *)arg);
+ usleep(USLEEP_MAX);
+ ctr++;
+ }
+ atomic_store(spinningptr, 0);
+ pthread_mutex_unlock(mutexptr);
+
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(ret);
+}
+
+static void *worker_testapp_validate(void *arg)
+{
+ struct udphdr *udp_hdr =
+ (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+ struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
+ struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
+ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+ void *bufs = NULL;
+
+ pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+ if (!bidi_pass) {
+ bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ if (strcmp(((struct ifobject *)arg)->nsname, ""))
+ switch_namespace(((struct ifobject *)arg)->ifdict_index);
+ }
+
+ if (((struct ifobject *)arg)->fv.vector == tx) {
+ int spinningrxctr = 0;
+
+ if (!bidi_pass)
+ thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+
+ while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
+ spinningrxctr++;
+ usleep(USLEEP_MAX);
+ }
+
+ ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname);
+ for (int i = 0; i < num_frames; i++) {
+ /*send EOT frame */
+ if (i == (num_frames - 1))
+ data->seqnum = -1;
+ else
+ data->seqnum = i;
+ gen_udp_hdr((void *)data, (void *)arg, udp_hdr);
+ gen_ip_hdr((void *)arg, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr((void *)arg, eth_hdr);
+ gen_eth_frame(((struct ifobject *)arg)->umem,
+ i * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ }
+
+ free(data);
+ ksft_print_msg("Sending %d packets on interface %s\n",
+ (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname);
+ tx_only_all(arg);
+ } else if (((struct ifobject *)arg)->fv.vector == rx) {
+ struct pollfd fds[MAX_SOCKS] = { };
+ int ret;
+
+ if (!bidi_pass)
+ thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+
+ ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
+ xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
+
+ TAILQ_INIT(&head);
+ if (debug_pkt_dump) {
+ pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames);
+ if (!pkt_buf)
+ exit_with_error(errno);
+ }
+
+ fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+ fds[0].events = POLLIN;
+
+ pthread_mutex_lock(&sync_mutex);
+ pthread_cond_signal(&signal_rx_condition);
+ pthread_mutex_unlock(&sync_mutex);
+
+ while (1) {
+ if (opt_poll) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret <= 0)
+ continue;
+ }
+ rx_pkt(((struct ifobject *)arg)->xsk, fds);
+ worker_pkt_validate();
+
+ if (sigvar)
+ break;
+ }
+
+ ksft_print_msg("Received %d packets on interface %s\n",
+ pkt_counter, ((struct ifobject *)arg)->ifname);
+
+ if (opt_teardown)
+ ksft_print_msg("Destroying socket\n");
+ }
+
+ if (!opt_bidi || (opt_bidi && bidi_pass)) {
+ xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+ (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+ }
+ pthread_exit(NULL);
+}
+
+static void testapp_validate(void)
+{
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+ if (opt_bidi && bidi_pass) {
+ pthread_init_mutex();
+ if (!switching_notify) {
+ ksft_print_msg("Switching Tx/Rx vectors\n");
+ switching_notify++;
+ }
+ }
+
+ pthread_mutex_lock(&sync_mutex);
+
+ /*Spawn RX thread */
+ if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+ if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+ exit_with_error(errno);
+ } else if (opt_bidi && bidi_pass) {
+ /*switch Tx/Rx vectors */
+ ifdict[0]->fv.vector = rx;
+ if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0]))
+ exit_with_error(errno);
+ }
+
+ struct timespec max_wait = { 0, 0 };
+
+ if (clock_gettime(CLOCK_REALTIME, &max_wait))
+ exit_with_error(errno);
+ max_wait.tv_sec += TMOUT_SEC;
+
+ if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+ exit_with_error(errno);
+
+ pthread_mutex_unlock(&sync_mutex);
+
+ /*Spawn TX thread */
+ if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+ if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+ exit_with_error(errno);
+ } else if (opt_bidi && bidi_pass) {
+ /*switch Tx/Rx vectors */
+ ifdict[1]->fv.vector = tx;
+ if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1]))
+ exit_with_error(errno);
+ }
+
+ pthread_join(t1, NULL);
+ pthread_join(t0, NULL);
+
+ if (debug_pkt_dump) {
+ worker_pkt_dump();
+ for (int iter = 0; iter < num_frames - 1; iter++) {
+ free(pkt_buf[iter]->payload);
+ free(pkt_buf[iter]);
+ }
+ free(pkt_buf);
+ }
+
+ if (!opt_teardown && !opt_bidi)
+ print_ksft_result();
+}
+
+static void testapp_sockets(void)
+{
+ for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ ksft_print_msg("Creating socket\n");
+ testapp_validate();
+ opt_bidi ? bidi_pass++ : bidi_pass;
+ }
+
+ print_ksft_result();
+}
+
+static void init_iface_config(void *ifaceconfig)
+{
+ /*Init interface0 */
+ ifdict[0]->fv.vector = tx;
+ memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+ memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+ ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+ ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+ ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+ ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+
+ /*Init interface1 */
+ ifdict[1]->fv.vector = rx;
+ memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+ memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+ ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+ ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+ ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+ ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+
+ if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
+ exit_with_error(errno);
+
+ const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+ const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+ const char *IP1 = "192.168.100.162";
+ const char *IP2 = "192.168.100.161";
+ u16 UDP_DST_PORT = 2020;
+ u16 UDP_SRC_PORT = 2121;
+
+ ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj));
+ memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
+ memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
+ inet_aton(IP1, &ifaceconfig->dst_ip);
+ inet_aton(IP2, &ifaceconfig->src_ip);
+ ifaceconfig->dst_port = UDP_DST_PORT;
+ ifaceconfig->src_port = UDP_SRC_PORT;
+
+ for (int i = 0; i < MAX_INTERFACES; i++) {
+ ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject));
+ if (!ifdict[i])
+ exit_with_error(errno);
+
+ ifdict[i]->ifdict_index = i;
+ }
+
+ setlocale(LC_ALL, "");
+
+ parse_command_line(argc, argv);
+
+ num_frames = ++opt_pkt_count;
+
+ init_iface_config((void *)ifaceconfig);
+
+ pthread_init_mutex();
+
+ ksft_set_plan(1);
+
+ if (!opt_teardown && !opt_bidi) {
+ testapp_validate();
+ } else if (opt_teardown && opt_bidi) {
+ ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
+ ksft_exit_xfail();
+ } else {
+ testapp_sockets();
+ }
+
+ for (int i = 0; i < MAX_INTERFACES; i++)
+ free(ifdict[i]);
+
+ pthread_destroy_mutex();
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
new file mode 100644
index 000000000000..61f595b6f200
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XDPXCEIVER_H_
+#define XDPXCEIVER_H_
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 7
+#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_SOCKS 1
+#define MAX_TEARDOWN_ITER 10
+#define MAX_BIDI_ITER 2
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct udphdr))
+#define MIN_PKT_SIZE 64
+#define ETH_FCS_SIZE 4
+#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_VER 0x4
+#define IP_PKT_TOS 0x9
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define TMOUT_SEC (3)
+#define EOT (-1)
+#define USLEEP_MAX 200000
+#define THREAD_STACK 60000000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define NEED_WAKEUP true
+
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+enum TESTS {
+ ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+ ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
+};
+
+u8 uut;
+u8 debug_pkt_dump;
+u32 num_frames;
+u8 switching_notify;
+u8 bidi_pass;
+
+static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int opt_queue;
+static int opt_pkt_count;
+static int opt_poll;
+static int opt_teardown;
+static int opt_bidi;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+static u32 pkt_counter;
+static u32 prev_pkt = -1;
+static int sigvar;
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ void *buffer;
+};
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_prod tx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+ unsigned long rx_npkts;
+ unsigned long tx_npkts;
+ unsigned long prev_rx_npkts;
+ unsigned long prev_tx_npkts;
+ u32 outstanding_tx;
+};
+
+struct flow_vector {
+ enum fvector {
+ tx,
+ rx,
+ bidi,
+ undef,
+ } vector;
+};
+
+struct generic_data {
+ u32 seqnum;
+};
+
+struct ifaceconfigobj {
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ struct in_addr dst_ip;
+ struct in_addr src_ip;
+ u16 src_port;
+ u16 dst_port;
+} *ifaceconfig;
+
+struct ifobject {
+ int ifindex;
+ int ifdict_index;
+ char ifname[MAX_INTERFACE_NAME_CHARS];
+ char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
+ struct flow_vector fv;
+ struct xsk_socket_info *xsk;
+ struct xsk_umem_info *umem;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ u32 dst_ip;
+ u32 src_ip;
+ u16 src_port;
+ u16 dst_port;
+};
+
+static struct ifobject *ifdict[MAX_INTERFACES];
+
+/*threads*/
+atomic_int spinning_tx;
+atomic_int spinning_rx;
+pthread_mutex_t sync_mutex;
+pthread_mutex_t sync_mutex_tx;
+pthread_cond_t signal_rx_condition;
+pthread_cond_t signal_tx_condition;
+pthread_t t0, t1, ns_thread;
+pthread_attr_t attr;
+
+struct targs {
+ bool retptr;
+ int idx;
+};
+
+TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
+struct head_s *head_p;
+struct pkt {
+ char *pkt_frame;
+
+ TAILQ_ENTRY(pkt) pkt_nodes;
+} *pkt_node_rx, *pkt_node_rx_q;
+
+struct pkt_frame {
+ char *payload;
+} *pkt_obj;
+
+struct pkt_frame **pkt_buf;
+
+#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
new file mode 100755
index 000000000000..9d54c4645127
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation.
+
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
+ksft_xpass=3
+ksft_skip=4
+
+GREEN='\033[0;92m'
+YELLOW='\033[0;93m'
+RED='\033[0;31m'
+NC='\033[0m'
+STACK_LIM=131072
+SPECFILE=veth.spec
+XSKOBJ=xdpxceiver
+NUMPKTS=10000
+
+validate_root_exec()
+{
+ msg="skip all tests:"
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ test_exit $ksft_fail 2
+ else
+ return $ksft_pass
+ fi
+}
+
+validate_veth_support()
+{
+ msg="skip all tests:"
+ if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
+ echo $msg veth kernel support not available >&2
+ test_exit $ksft_skip 1
+ else
+ ip link del $1
+ return $ksft_pass
+ fi
+}
+
+validate_veth_spec_file()
+{
+ if [ ! -f ${SPECFILE} ]; then
+ test_exit $ksft_skip 1
+ fi
+}
+
+test_status()
+{
+ statusval=$1
+ if [ -n "${colorconsole+set}" ]; then
+ if [ $statusval -eq 2 ]; then
+ echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
+ fi
+ else
+ if [ $statusval -eq 2 ]; then
+ echo -e "$2: [ FAIL ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "$2: [ SKIPPED ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "$2: [ PASS ]"
+ fi
+ fi
+}
+
+test_exit()
+{
+ retval=$1
+ if [ $2 -ne 0 ]; then
+ test_status $2 $(basename $0)
+ fi
+ exit $retval
+}
+
+clear_configs()
+{
+ if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
+ [ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
+ { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
+ echo "removing ns $3"
+ ip netns del $3
+ fi
+ #Once we delete a veth pair node, the entire veth pair is removed,
+ #this is just to be cautious just incase the NS does not exist then
+ #veth node inside NS won't get removed so we explicitly remove it
+ [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
+ { echo "removing link $1"; ip link del $1; }
+ if [ -f ${SPECFILE} ]; then
+ echo "removing spec file:" ${SPECFILE}
+ rm -f ${SPECFILE}
+ fi
+}
+
+cleanup_exit()
+{
+ echo "cleaning up..."
+ clear_configs $1 $2 $3
+}
+
+validate_ip_utility()
+{
+ [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+}
+
+vethXDPgeneric()
+{
+ ip link set dev $1 xdpdrv off
+ ip netns exec $3 ip link set dev $2 xdpdrv off
+}
+
+vethXDPnative()
+{
+ ip link set dev $1 xdpgeneric off
+ ip netns exec $3 ip link set dev $2 xdpgeneric off
+}
+
+execxdpxceiver()
+{
+ local -a 'paramkeys=("${!'"$1"'[@]}")' copy
+ paramkeysstr=${paramkeys[*]}
+
+ for index in $paramkeysstr;
+ do
+ current=$1"[$index]"
+ copy[$index]=${!current}
+ done
+
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+}