summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-05 20:13:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-05 20:13:21 -0700
commit47ec5303d73ea344e84f46660fff693c57641386 (patch)
treea2252debab749de29620c43285295d60c4741119 /net/core
parent8186749621ed6b8fc42644c399e8c755a2b6f630 (diff)
parentc1055b76ad00aed0e8b79417080f212d736246b6 (diff)
downloadlinux-47ec5303d73ea344e84f46660fff693c57641386.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Support 6Ghz band in ath11k driver, from Rajkumar Manoharan. 2) Support UDP segmentation in code TSO code, from Eric Dumazet. 3) Allow flashing different flash images in cxgb4 driver, from Vishal Kulkarni. 4) Add drop frames counter and flow status to tc flower offloading, from Po Liu. 5) Support n-tuple filters in cxgb4, from Vishal Kulkarni. 6) Various new indirect call avoidance, from Eric Dumazet and Brian Vazquez. 7) Fix BPF verifier failures on 32-bit pointer arithmetic, from Yonghong Song. 8) Support querying and setting hardware address of a port function via devlink, use this in mlx5, from Parav Pandit. 9) Support hw ipsec offload on bonding slaves, from Jarod Wilson. 10) Switch qca8k driver over to phylink, from Jonathan McDowell. 11) In bpftool, show list of processes holding BPF FD references to maps, programs, links, and btf objects. From Andrii Nakryiko. 12) Several conversions over to generic power management, from Vaibhav Gupta. 13) Add support for SO_KEEPALIVE et al. to bpf_setsockopt(), from Dmitry Yakunin. 14) Various https url conversions, from Alexander A. Klimov. 15) Timestamping and PHC support for mscc PHY driver, from Antoine Tenart. 16) Support bpf iterating over tcp and udp sockets, from Yonghong Song. 17) Support 5GBASE-T i40e NICs, from Aleksandr Loktionov. 18) Add kTLS RX HW offload support to mlx5e, from Tariq Toukan. 19) Fix the ->ndo_start_xmit() return type to be netdev_tx_t in several drivers. From Luc Van Oostenryck. 20) XDP support for xen-netfront, from Denis Kirjanov. 21) Support receive buffer autotuning in MPTCP, from Florian Westphal. 22) Support EF100 chip in sfc driver, from Edward Cree. 23) Add XDP support to mvpp2 driver, from Matteo Croce. 24) Support MPTCP in sock_diag, from Paolo Abeni. 25) Commonize UDP tunnel offloading code by creating udp_tunnel_nic infrastructure, from Jakub Kicinski. 26) Several pci_ --> dma_ API conversions, from Christophe JAILLET. 27) Add FLOW_ACTION_POLICE support to mlxsw, from Ido Schimmel. 28) Add SK_LOOKUP bpf program type, from Jakub Sitnicki. 29) Refactor a lot of networking socket option handling code in order to avoid set_fs() calls, from Christoph Hellwig. 30) Add rfc4884 support to icmp code, from Willem de Bruijn. 31) Support TBF offload in dpaa2-eth driver, from Ioana Ciornei. 32) Support XDP_REDIRECT in qede driver, from Alexander Lobakin. 33) Support PCI relaxed ordering in mlx5 driver, from Aya Levin. 34) Support TCP syncookies in MPTCP, from Flowian Westphal. 35) Fix several tricky cases of PMTU handling wrt. briding, from Stefano Brivio. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2056 commits) net: thunderx: initialize VF's mailbox mutex before first usage usb: hso: remove bogus check for EINPROGRESS usb: hso: no complaint about kmalloc failure hso: fix bailout in error case of probe ip_tunnel_core: Fix build for archs without _HAVE_ARCH_IPV6_CSUM selftests/net: relax cpu affinity requirement in msg_zerocopy test mptcp: be careful on subflow creation selftests: rtnetlink: make kci_test_encap() return sub-test result selftests: rtnetlink: correct the final return value for the test net: dsa: sja1105: use detected device id instead of DT one on mismatch tipc: set ub->ifindex for local ipv6 address ipv6: add ipv6_dev_find() net: openvswitch: silence suspicious RCU usage warning Revert "vxlan: fix tos value before xmit" ptp: only allow phase values lower than 1 period farsync: switch from 'pci_' to 'dma_' API wan: wanxl: switch from 'pci_' to 'dma_' API hv_netvsc: do not use VF device if link is down dpaa2-eth: Fix passing zero to 'PTR_ERR' warning net: macb: Properly handle phylink on at91sam9x ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c260
-rw-r--r--net/core/dev.c582
-rw-r--r--net/core/dev_ioctl.c29
-rw-r--r--net/core/devlink.c653
-rw-r--r--net/core/fib_rules.c31
-rw-r--r--net/core/filter.c386
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/flow_offload.c12
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/rtnetlink.c118
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c119
-rw-r--r--net/core/sock_map.c88
-rw-r--r--net/core/tso.c44
-rw-r--r--net/core/xdp.c9
15 files changed, 1831 insertions, 525 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d2c4d16dadba..d3377c90a291 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -6,13 +6,12 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
-static atomic_t cache_idx;
-
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
@@ -81,6 +80,9 @@ struct bpf_sk_storage_elem {
#define SDATA(_SELEM) (&(_SELEM)->sdata)
#define BPF_SK_STORAGE_CACHE_SIZE 16
+static DEFINE_SPINLOCK(cache_idx_lock);
+static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
+
struct bpf_sk_storage {
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_sk_storage_elem */
@@ -512,6 +514,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
+static u16 cache_idx_get(void)
+{
+ u64 min_usage = U64_MAX;
+ u16 i, res = 0;
+
+ spin_lock(&cache_idx_lock);
+
+ for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
+ if (cache_idx_usage_counts[i] < min_usage) {
+ min_usage = cache_idx_usage_counts[i];
+ res = i;
+
+ /* Found a free cache_idx */
+ if (!min_usage)
+ break;
+ }
+ }
+ cache_idx_usage_counts[res]++;
+
+ spin_unlock(&cache_idx_lock);
+
+ return res;
+}
+
+static void cache_idx_free(u16 idx)
+{
+ spin_lock(&cache_idx_lock);
+ cache_idx_usage_counts[idx]--;
+ spin_unlock(&cache_idx_lock);
+}
+
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
@@ -560,6 +593,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ cache_idx_free(smap->cache_idx);
+
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
@@ -673,8 +708,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
}
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
- smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
- BPF_SK_STORAGE_CACHE_SIZE;
+ smap->cache_idx = cache_idx_get();
return &smap->map;
}
@@ -886,6 +920,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}
+static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
.map_alloc_check = bpf_sk_storage_map_alloc_check,
.map_alloc = bpf_sk_storage_map_alloc,
@@ -895,6 +930,8 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_sk_storage_map_check_btf,
+ .map_btf_name = "bpf_sk_storage_map",
+ .map_btf_id = &sk_storage_map_btf_id,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
@@ -907,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
+ .func = bpf_sk_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.func = bpf_sk_storage_delete,
.gpl_only = false,
@@ -1181,3 +1228,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
return err;
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
+
+struct bpf_iter_seq_sk_storage_map_info {
+ struct bpf_map *map;
+ unsigned int bucket_id;
+ unsigned skip_elems;
+};
+
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
+ struct bpf_sk_storage_elem *prev_selem)
+{
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ u32 skip_elems = info->skip_elems;
+ struct bpf_sk_storage_map *smap;
+ u32 bucket_id = info->bucket_id;
+ u32 i, count, n_buckets;
+ struct bucket *b;
+
+ smap = (struct bpf_sk_storage_map *)info->map;
+ n_buckets = 1U << smap->bucket_log;
+ if (bucket_id >= n_buckets)
+ return NULL;
+
+ /* try to find next selem in the same bucket */
+ selem = prev_selem;
+ count = 0;
+ while (selem) {
+ selem = hlist_entry_safe(selem->map_node.next,
+ struct bpf_sk_storage_elem, map_node);
+ if (!selem) {
+ /* not found, unlock and go to the next bucket */
+ b = &smap->buckets[bucket_id++];
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ break;
+ }
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage) {
+ info->skip_elems = skip_elems + count;
+ return selem;
+ }
+ count++;
+ }
+
+ for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
+ b = &smap->buckets[i];
+ raw_spin_lock_bh(&b->lock);
+ count = 0;
+ hlist_for_each_entry(selem, &b->list, map_node) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage && count >= skip_elems) {
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return selem;
+ }
+ count++;
+ }
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+}
+
+static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_sk_storage_elem *selem;
+
+ selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
+ if (!selem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return selem;
+}
+
+static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_sk_storage_map_seq_find_next(seq->private, v);
+}
+
+struct bpf_iter__bpf_sk_storage_map {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(struct sock *, sk);
+ __bpf_md_ptr(void *, value);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
+ struct bpf_map *map, struct sock *sk,
+ void *value)
+
+static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_iter__bpf_sk_storage_map ctx = {};
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, selem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (selem) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ ctx.sk = sk_storage->sk;
+ ctx.value = SDATA(selem)->data;
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_sk_storage_map_seq_show(seq, v);
+}
+
+static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_sk_storage_map *smap;
+ struct bucket *b;
+
+ if (!v) {
+ (void)__bpf_sk_storage_map_seq_show(seq, v);
+ } else {
+ smap = (struct bpf_sk_storage_map *)info->map;
+ b = &smap->buckets[info->bucket_id];
+ raw_spin_unlock_bh(&b->lock);
+ }
+}
+
+static int bpf_iter_init_sk_storage_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
+ seq_info->map = aux->map;
+ return 0;
+}
+
+static int bpf_iter_check_map(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map = aux->map;
+
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ return -EINVAL;
+
+ if (prog->aux->max_rdonly_access > map->value_size)
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct seq_operations bpf_sk_storage_map_seq_ops = {
+ .start = bpf_sk_storage_map_seq_start,
+ .next = bpf_sk_storage_map_seq_next,
+ .stop = bpf_sk_storage_map_seq_stop,
+ .show = bpf_sk_storage_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_sk_storage_map_seq_ops,
+ .init_seq_private = bpf_iter_init_sk_storage_map,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
+};
+
+static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
+ .target = "bpf_sk_storage_map",
+ .check_target = bpf_iter_check_map,
+ .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
+ PTR_TO_RDWR_BUF_OR_NULL },
+ },
+ .seq_info = &iter_seq_info,
+};
+
+static int __init bpf_sk_storage_map_iter_init(void)
+{
+ bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
+}
+late_initcall(bpf_sk_storage_map_iter_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index ba4de97b676b..7df6c9617321 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -143,6 +143,7 @@
#include <linux/net_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <net/devlink.h>
+#include <linux/pm_runtime.h>
#include "net-sysfs.h"
@@ -1492,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
ASSERT_RTNL();
- if (!netif_device_present(dev))
- return -ENODEV;
+ if (!netif_device_present(dev)) {
+ /* may be detached because parent is runtime-suspended */
+ if (dev->dev.parent)
+ pm_runtime_resume(dev->dev.parent);
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ }
/* Block netpoll from trying to do any rx path servicing.
* If we don't do this there is a chance ndo_poll_controller
@@ -3448,10 +3454,9 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t features)
{
- int tmp;
__be16 type;
- type = skb_network_protocol(skb, &tmp);
+ type = skb_network_protocol(skb, NULL);
features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
@@ -5442,6 +5447,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
+ if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ return -EINVAL;
}
}
@@ -5460,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
}
break;
- case XDP_QUERY_PROG:
- xdp->prog_id = old ? old->aux->id : 0;
- break;
-
default:
ret = -EINVAL;
break;
@@ -5585,7 +5588,7 @@ void netif_receive_skb_list(struct list_head *head)
}
EXPORT_SYMBOL(netif_receive_skb_list);
-DEFINE_PER_CPU(struct work_struct, flush_works);
+static DEFINE_PER_CPU(struct work_struct, flush_works);
/* Network device is going away, flush any packets still pending */
static void flush_backlog(struct work_struct *work)
@@ -6685,7 +6688,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
trace_napi_poll(n, work, weight);
}
- WARN_ON_ONCE(work > weight);
+ if (unlikely(work > weight))
+ pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+ n->poll, work, weight);
if (likely(work < weight))
goto out_unlock;
@@ -8706,182 +8711,489 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down_generic);
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
- enum bpf_netdev_command cmd)
+/**
+ * dev_change_proto_down_reason - proto down reason
+ *
+ * @dev: device
+ * @mask: proto down mask
+ * @value: proto down value
+ */
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value)
{
- struct netdev_bpf xdp;
+ int b;
- if (!bpf_op)
- return 0;
+ if (!mask) {
+ dev->proto_down_reason = value;
+ } else {
+ for_each_set_bit(b, &mask, 32) {
+ if (value & (1 << b))
+ dev->proto_down_reason |= BIT(b);
+ else
+ dev->proto_down_reason &= ~BIT(b);
+ }
+ }
+}
+EXPORT_SYMBOL(dev_change_proto_down_reason);
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = cmd;
+struct bpf_xdp_link {
+ struct bpf_link link;
+ struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
+ int flags;
+};
- /* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
+{
+ if (flags & XDP_FLAGS_HW_MODE)
+ return XDP_MODE_HW;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return XDP_MODE_DRV;
+ return XDP_MODE_SKB;
+}
- return xdp.prog_id;
+static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ switch (mode) {
+ case XDP_MODE_SKB:
+ return generic_xdp_install;
+ case XDP_MODE_DRV:
+ case XDP_MODE_HW:
+ return dev->netdev_ops->ndo_bpf;
+ default:
+ return NULL;
+ };
+}
+
+static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ return dev->xdp_state[mode].link;
}
-static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
- struct netlink_ext_ack *extack, u32 flags,
- struct bpf_prog *prog)
+static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
+
+ if (link)
+ return link->link.prog;
+ return dev->xdp_state[mode].prog;
+}
+
+u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ struct bpf_prog *prog = dev_xdp_prog(dev, mode);
+
+ return prog ? prog->aux->id : 0;
+}
+
+static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_xdp_link *link)
+{
+ dev->xdp_state[mode].link = link;
+ dev->xdp_state[mode].prog = NULL;
+}
+
+static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_prog *prog)
+{
+ dev->xdp_state[mode].link = NULL;
+ dev->xdp_state[mode].prog = prog;
+}
+
+static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
+ bpf_op_t bpf_op, struct netlink_ext_ack *extack,
+ u32 flags, struct bpf_prog *prog)
{
- bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
- struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
int err;
- if (non_hw) {
- prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
- XDP_QUERY_PROG));
- if (IS_ERR(prev_prog))
- prev_prog = NULL;
- }
-
memset(&xdp, 0, sizeof(xdp));
- if (flags & XDP_FLAGS_HW_MODE)
- xdp.command = XDP_SETUP_PROG_HW;
- else
- xdp.command = XDP_SETUP_PROG;
+ xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
xdp.flags = flags;
xdp.prog = prog;
+ /* Drivers assume refcnt is already incremented (i.e, prog pointer is
+ * "moved" into driver), so they don't increment it on their own, but
+ * they do decrement refcnt when program is detached or replaced.
+ * Given net_device also owns link/prog, we need to bump refcnt here
+ * to prevent drivers from underflowing it.
+ */
+ if (prog)
+ bpf_prog_inc(prog);
err = bpf_op(dev, &xdp);
- if (!err && non_hw)
- bpf_prog_change_xdp(prev_prog, prog);
+ if (err) {
+ if (prog)
+ bpf_prog_put(prog);
+ return err;
+ }
- if (prev_prog)
- bpf_prog_put(prev_prog);
+ if (mode != XDP_MODE_HW)
+ bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
- return err;
+ return 0;
}
static void dev_xdp_uninstall(struct net_device *dev)
{
- struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
+ struct bpf_xdp_link *link;
+ struct bpf_prog *prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
- /* Remove generic XDP */
- WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+ ASSERT_RTNL();
- /* Remove from the driver */
- ndo_bpf = dev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
- return;
+ for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
+ prog = dev_xdp_prog(dev, mode);
+ if (!prog)
+ continue;
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
- WARN_ON(ndo_bpf(dev, &xdp));
- if (xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op)
+ continue;
- /* Remove HW offload */
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG_HW;
- if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+
+ /* auto-detach link from net device */
+ link = dev_xdp_link(dev, mode);
+ if (link)
+ link->dev = NULL;
+ else
+ bpf_prog_put(prog);
+
+ dev_xdp_set_link(dev, mode, NULL);
+ }
}
-/**
- * dev_change_xdp_fd - set or clear a bpf program for a device rx path
- * @dev: device
- * @extack: netlink extended ack
- * @fd: new program fd or negative value to clear
- * @expected_fd: old program fd that userspace expects to replace or clear
- * @flags: xdp-related flags
- *
- * Set or clear a bpf program for a device
- */
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
- int fd, int expected_fd, u32 flags)
+static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog, u32 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
- enum bpf_netdev_command query;
- u32 prog_id, expected_id = 0;
- bpf_op_t bpf_op, bpf_chk;
- struct bpf_prog *prog;
- bool offload;
+ struct bpf_prog *cur_prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
int err;
ASSERT_RTNL();
- offload = flags & XDP_FLAGS_HW_MODE;
- query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+ /* either link or prog attachment, never both */
+ if (link && (new_prog || old_prog))
+ return -EINVAL;
+ /* link supports only XDP mode flags */
+ if (link && (flags & ~XDP_FLAGS_MODES)) {
+ NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
+ return -EINVAL;
+ }
+ /* just one XDP mode bit should be set, zero defaults to SKB mode */
+ if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+ NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
+ return -EINVAL;
+ }
+ /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
+ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
+ return -EINVAL;
+ }
- bpf_op = bpf_chk = ops->ndo_bpf;
- if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
- NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
- return -EOPNOTSUPP;
+ mode = dev_xdp_mode(flags);
+ /* can't replace attached link */
+ if (dev_xdp_link(dev, mode)) {
+ NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
+ return -EBUSY;
}
- if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
- bpf_op = generic_xdp_install;
- if (bpf_op == bpf_chk)
- bpf_chk = generic_xdp_install;
-
- prog_id = __dev_xdp_query(dev, bpf_op, query);
- if (flags & XDP_FLAGS_REPLACE) {
- if (expected_fd >= 0) {
- prog = bpf_prog_get_type_dev(expected_fd,
- BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- expected_id = prog->aux->id;
- bpf_prog_put(prog);
- }
- if (prog_id != expected_id) {
- NL_SET_ERR_MSG(extack, "Active program does not match expected");
- return -EEXIST;
- }
+ cur_prog = dev_xdp_prog(dev, mode);
+ /* can't replace attached prog with link */
+ if (link && cur_prog) {
+ NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
+ return -EBUSY;
+ }
+ if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
+ NL_SET_ERR_MSG(extack, "Active program does not match expected");
+ return -EEXIST;
+ }
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+ NL_SET_ERR_MSG(extack, "XDP program already attached");
+ return -EBUSY;
}
- if (fd >= 0) {
- if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
- NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
- return -EEXIST;
- }
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
- NL_SET_ERR_MSG(extack, "XDP program already attached");
- return -EBUSY;
- }
+ /* put effective new program into new_prog */
+ if (link)
+ new_prog = link->link.prog;
- prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (new_prog) {
+ bool offload = mode == XDP_MODE_HW;
+ enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
+ ? XDP_MODE_DRV : XDP_MODE_SKB;
- if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
- NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
- bpf_prog_put(prog);
+ if (!offload && dev_xdp_prog(dev, other_mode)) {
+ NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
+ return -EEXIST;
+ }
+ if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
- bpf_prog_put(prog);
return -EINVAL;
}
+ if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
+ return -EINVAL;
+ }
+ }
- /* prog->aux->id may be 0 for orphaned device-bound progs */
- if (prog->aux->id && prog->aux->id == prog_id) {
- bpf_prog_put(prog);
- return 0;
+ /* don't call drivers if the effective program didn't change */
+ if (new_prog != cur_prog) {
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op) {
+ NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
+ return -EOPNOTSUPP;
+ }
+
+ err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
+ if (err)
+ return err;
+ }
+
+ if (link)
+ dev_xdp_set_link(dev, mode, link);
+ else
+ dev_xdp_set_prog(dev, mode, new_prog);
+ if (cur_prog)
+ bpf_prog_put(cur_prog);
+
+ return 0;
+}
+
+static int dev_xdp_attach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
+}
+
+static int dev_xdp_detach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+
+ ASSERT_RTNL();
+
+ mode = dev_xdp_mode(link->flags);
+ if (dev_xdp_link(dev, mode) != link)
+ return -EINVAL;
+
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+ dev_xdp_set_link(dev, mode, NULL);
+ return 0;
+}
+
+static void bpf_xdp_link_release(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ rtnl_lock();
+
+ /* if racing with net_device's tear down, xdp_link->dev might be
+ * already NULL, in which case link was already auto-detached
+ */
+ if (xdp_link->dev) {
+ WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+ xdp_link->dev = NULL;
+ }
+
+ rtnl_unlock();
+}
+
+static int bpf_xdp_link_detach(struct bpf_link *link)
+{
+ bpf_xdp_link_release(link);
+ return 0;
+}
+
+static void bpf_xdp_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ kfree(xdp_link);
+}
+
+static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ seq_printf(seq, "ifindex:\t%u\n", ifindex);
+}
+
+static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ info->xdp.ifindex = ifindex;
+ return 0;
+}
+
+static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+ int err = 0;
+
+ rtnl_lock();
+
+ /* link might have been auto-released already, so fail */
+ if (!xdp_link->dev) {
+ err = -ENOLINK;
+ goto out_unlock;
+ }
+
+ if (old_prog && link->prog != old_prog) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ old_prog = link->prog;
+ if (old_prog == new_prog) {
+ /* no-op, don't disturb drivers */
+ bpf_prog_put(new_prog);
+ goto out_unlock;
+ }
+
+ mode = dev_xdp_mode(xdp_link->flags);
+ bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
+ err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
+ xdp_link->flags, new_prog);
+ if (err)
+ goto out_unlock;
+
+ old_prog = xchg(&link->prog, new_prog);
+ bpf_prog_put(old_prog);
+
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static const struct bpf_link_ops bpf_xdp_link_lops = {
+ .release = bpf_xdp_link_release,
+ .dealloc = bpf_xdp_link_dealloc,
+ .detach = bpf_xdp_link_detach,
+ .show_fdinfo = bpf_xdp_link_show_fdinfo,
+ .fill_link_info = bpf_xdp_link_fill_link_info,
+ .update_prog = bpf_xdp_link_update,
+};
+
+int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct bpf_xdp_link *link;
+ struct net_device *dev;
+ int err, fd;
+
+ dev = dev_get_by_index(net, attr->link_create.target_ifindex);
+ if (!dev)
+ return -EINVAL;
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_dev;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ link->dev = dev;
+ link->flags = attr->link_create.flags;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_dev;
+ }
+
+ rtnl_lock();
+ err = dev_xdp_attach_link(dev, NULL, link);
+ rtnl_unlock();
+
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_dev;
+ }
+
+ fd = bpf_link_settle(&link_primer);
+ /* link itself doesn't hold dev's refcnt to not complicate shutdown */
+ dev_put(dev);
+ return fd;
+
+out_put_dev:
+ dev_put(dev);
+ return err;
+}
+
+/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @extack: netlink extended ack
+ * @fd: new program fd or negative value to clear
+ * @expected_fd: old program fd that userspace expects to replace or clear
+ * @flags: xdp-related flags
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags)
+{
+ enum bpf_xdp_mode mode = dev_xdp_mode(flags);
+ struct bpf_prog *new_prog = NULL, *old_prog = NULL;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (fd >= 0) {
+ new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+ }
+
+ if (expected_fd >= 0) {
+ old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(old_prog)) {
+ err = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto err_out;
}
- } else {
- if (!prog_id)
- return 0;
- prog = NULL;
}
- err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
- if (err < 0 && prog)
- bpf_prog_put(prog);
+ err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
+err_out:
+ if (err && new_prog)
+ bpf_prog_put(new_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
return err;
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 547b587c1950..b2cf9b7bb7b8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -5,6 +5,7 @@
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
+#include <net/dsa.h>
#include <net/wext.h>
/*
@@ -225,6 +226,26 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
return 0;
}
+static int dev_do_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int err = -EOPNOTSUPP;
+
+ err = dsa_ndo_do_ioctl(dev, ifr, cmd);
+ if (err == 0 || err != -EOPNOTSUPP)
+ return err;
+
+ if (ops->ndo_do_ioctl) {
+ if (netif_device_present(dev))
+ err = ops->ndo_do_ioctl(dev, ifr, cmd);
+ else
+ err = -ENODEV;
+ }
+
+ return err;
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rtnl_lock()
*/
@@ -323,13 +344,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCSHWTSTAMP ||
cmd == SIOCGHWTSTAMP ||
cmd == SIOCWANDEV) {
- err = -EOPNOTSUPP;
- if (ops->ndo_do_ioctl) {
- if (netif_device_present(dev))
- err = ops->ndo_do_ioctl(dev, ifr, cmd);
- else
- err = -ENODEV;
- }
+ err = dev_do_ioctl(dev, ifr, cmd);
} else
err = -EINVAL;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 47f14a2f25fb..e674f0f46dc2 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -85,6 +85,10 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+};
+
static LIST_HEAD(devlink_list);
/* devlink_mutex
@@ -382,19 +386,19 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -406,27 +410,18 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
}
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_lock(&devlink->lock);
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- info->user_ptr[0] = devlink;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
- struct devlink_port *devlink_port;
-
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
- info->user_ptr[0] = devlink_port;
- }
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb)) {
- err = PTR_ERR(devlink_sb);
- goto unlock;
- }
- info->user_ptr[1] = devlink_sb;
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (!IS_ERR(devlink_port))
+ info->user_ptr[1] = devlink_port;
}
return 0;
@@ -442,16 +437,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- /* When devlink changes netns, it would not be found
- * by devlink_get_from_info(). So try if it is stored first.
- */
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- devlink = info->user_ptr[0];
- } else {
- devlink = devlink_get_from_info(info);
- WARN_ON(IS_ERR(devlink));
- }
- if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ devlink = info->user_ptr[0];
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -524,8 +511,14 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- if (!attrs->set)
+ if (!devlink_port->attrs_set)
return 0;
+ if (attrs->lanes) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
+ return -EMSGSIZE;
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
return -EMSGSIZE;
switch (devlink_port->attrs.flavour) {
@@ -563,10 +556,54 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = port->devlink;
+ const struct devlink_ops *ops;
+ struct nlattr *function_attr;
+ bool empty_nest = true;
+ int err = 0;
+
+ function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+ if (!function_attr)
+ return -EMSGSIZE;
+
+ ops = devlink->ops;
+ if (ops->port_function_hw_addr_get) {
+ int hw_addr_len;
+ u8 hw_addr[MAX_ADDR_LEN];
+
+ err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ if (err == -EOPNOTSUPP) {
+ /* Port function attributes are optional for a port. If port doesn't
+ * support function attribute, returning -EOPNOTSUPP is not an error.
+ */
+ err = 0;
+ goto out;
+ } else if (err) {
+ goto out;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ goto out;
+ empty_nest = false;
+ }
+
+out:
+ if (err || empty_nest)
+ nla_nest_cancel(msg, function_attr);
+ else
+ nla_nest_end(msg, function_attr);
+ return err;
+}
+
static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_port *devlink_port,
enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
{
void *hdr;
@@ -607,6 +644,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -634,7 +673,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
if (!msg)
return;
- err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
+ err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
+ NULL);
if (err) {
nlmsg_free(msg);
return;
@@ -697,7 +737,7 @@ out:
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -708,7 +748,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
err = devlink_nl_port_fill(msg, devlink, devlink_port,
DEVLINK_CMD_PORT_NEW,
- info->snd_portid, info->snd_seq, 0);
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
if (err) {
nlmsg_free(msg);
return err;
@@ -740,7 +781,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ NLM_F_MULTI,
+ cb->extack);
if (err) {
mutex_unlock(&devlink->lock);
goto out;
@@ -778,10 +820,71 @@ static int devlink_port_type_set(struct devlink *devlink,
return -EOPNOTSUPP;
}
+static int
+devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops;
+ const u8 *hw_addr;
+ int hw_addr_len;
+ int err;
+
+ hw_addr = nla_data(attr);
+ hw_addr_len = nla_len(attr);
+ if (hw_addr_len > MAX_ADDR_LEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long");
+ return -EINVAL;
+ }
+ if (port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (hw_addr_len != ETH_ALEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device");
+ return -EINVAL;
+ }
+ if (!is_unicast_ether_addr(hw_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported");
+ return -EINVAL;
+ }
+ }
+
+ ops = devlink->ops;
+ if (!ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ if (err)
+ return err;
+
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+
+static int
+devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+ devlink_function_nl_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes");
+ return err;
+ }
+
+ attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+ if (attr)
+ err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+
+ return err;
+}
+
static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
int err;
@@ -793,6 +896,16 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
if (err)
return err;
}
+
+ if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+ struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+ struct netlink_ext_ack *extack = info->extack;
+
+ err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -810,6 +923,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port;
u32 port_index;
u32 count;
@@ -817,8 +931,27 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
!info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
return -EINVAL;
+ devlink_port = devlink_port_get_from_info(devlink, info);
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+
+ if (IS_ERR(devlink_port))
+ return -EINVAL;
+
+ if (!devlink_port->attrs.splittable) {
+ /* Split ports cannot be split. */
+ if (devlink_port->attrs.split)
+ NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further");
+ else
+ NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split");
+ return -EINVAL;
+ }
+
+ if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count");
+ return -EINVAL;
+ }
+
return devlink_port_split(devlink, port_index, count, info->extack);
}
@@ -886,10 +1019,14 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -991,11 +1128,15 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1102,12 +1243,16 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
enum devlink_sb_threshold_type threshold_type;
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 size;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1186,13 +1331,17 @@ nla_put_failure:
static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1304,12 +1453,17 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1391,14 +1545,18 @@ nla_put_failure:
static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
enum devlink_sb_pool_type pool_type;
u16 tc_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1540,14 +1698,19 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
enum devlink_sb_pool_type pool_type;
+ struct devlink_sb *devlink_sb;
u16 tc_index;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1575,8 +1738,12 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_snapshot)
return ops->sb_occ_snapshot(devlink, devlink_sb->index);
@@ -1587,8 +1754,12 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_max_clear)
return ops->sb_occ_max_clear(devlink, devlink_sb->index);
@@ -2772,7 +2943,7 @@ static void devlink_reload_netns_change(struct devlink *devlink,
DEVLINK_CMD_PARAM_NEW);
}
-static bool devlink_reload_supported(struct devlink *devlink)
+static bool devlink_reload_supported(const struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
}
@@ -2818,7 +2989,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
struct net *dest_net = NULL;
int err;
- if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4388,6 +4559,14 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
}
EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
static int devlink_info_version_put(struct devlink_info_req *req, int attr,
const char *version_name,
const char *version_value)
@@ -5141,6 +5320,7 @@ struct devlink_health_reporter {
void *priv;
const struct devlink_health_reporter_ops *ops;
struct devlink *devlink;
+ struct devlink_port *devlink_port;
struct devlink_fmsg *dump_fmsg;
struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period;
@@ -5163,18 +5343,98 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
- const char *reporter_name)
+__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
+ struct mutex *list_lock,
+ const char *reporter_name)
{
struct devlink_health_reporter *reporter;
- lockdep_assert_held(&devlink->reporters_lock);
- list_for_each_entry(reporter, &devlink->reporter_list, list)
+ lockdep_assert_held(list_lock);
+ list_for_each_entry(reporter, reporter_list, list)
if (!strcmp(reporter->ops->name, reporter_name))
return reporter;
return NULL;
}
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
+ &devlink->reporters_lock,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
+ &devlink_port->reporters_lock,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ if (WARN_ON(graceful_period && !ops->recover))
+ return ERR_PTR(-EINVAL);
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter)
+ return ERR_PTR(-ENOMEM);
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = !!ops->recover;
+ reporter->auto_dump = !!ops->dump;
+ mutex_init(&reporter->dump_lock);
+ refcount_set(&reporter->refcount, 1);
+ return reporter;
+}
+
+/**
+ * devlink_port_health_reporter_create - create devlink health reporter for
+ * specified port instance
+ *
+ * @port: devlink_port which should contain the new reporter
+ * @ops: ops
+ * @graceful_period: to avoid recovery loops, in msecs
+ * @priv: priv
+ */
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ mutex_lock(&port->reporters_lock);
+ if (__devlink_health_reporter_find_by_name(&port->reporter_list,
+ &port->reporters_lock, ops->name)) {
+ reporter = ERR_PTR(-EEXIST);
+ goto unlock;
+ }
+
+ reporter = __devlink_health_reporter_create(port->devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ goto unlock;
+
+ reporter->devlink_port = port;
+ list_add_tail(&reporter->list, &port->reporter_list);
+unlock:
+ mutex_unlock(&port->reporters_lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
+
/**
* devlink_health_reporter_create - create devlink health reporter
*
@@ -5196,25 +5456,11 @@ devlink_health_reporter_create(struct devlink *devlink,
goto unlock;
}
- if (WARN_ON(graceful_period && !ops->recover)) {
- reporter = ERR_PTR(-EINVAL);
- goto unlock;
- }
-
- reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
- if (!reporter) {
- reporter = ERR_PTR(-ENOMEM);
+ reporter = __devlink_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
goto unlock;
- }
- reporter->priv = priv;
- reporter->ops = ops;
- reporter->devlink = devlink;
- reporter->graceful_period = graceful_period;
- reporter->auto_recover = !!ops->recover;
- reporter->auto_dump = !!ops->dump;
- mutex_init(&reporter->dump_lock);
- refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
unlock:
mutex_unlock(&devlink->reporters_lock);
@@ -5222,6 +5468,29 @@ unlock:
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+static void
+devlink_health_reporter_free(struct devlink_health_reporter *reporter)
+{
+ mutex_destroy(&reporter->dump_lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+
+static void
+devlink_health_reporter_put(struct devlink_health_reporter *reporter)
+{
+ if (refcount_dec_and_test(&reporter->refcount))
+ devlink_health_reporter_free(reporter);
+}
+
+static void
+__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ list_del(&reporter->list);
+ devlink_health_reporter_put(reporter);
+}
+
/**
* devlink_health_reporter_destroy - destroy devlink health reporter
*
@@ -5230,18 +5499,30 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
{
- mutex_lock(&reporter->devlink->reporters_lock);
- list_del(&reporter->list);
- mutex_unlock(&reporter->devlink->reporters_lock);
- while (refcount_read(&reporter->refcount) > 1)
- msleep(100);
- mutex_destroy(&reporter->dump_lock);
- if (reporter->dump_fmsg)
- devlink_fmsg_free(reporter->dump_fmsg);
- kfree(reporter);
+ struct mutex *lock = &reporter->devlink->reporters_lock;
+
+ mutex_lock(lock);
+ __devlink_health_reporter_destroy(reporter);
+ mutex_unlock(lock);
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+/**
+ * devlink_port_health_reporter_destroy - destroy devlink port health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ struct mutex *lock = &reporter->devlink_port->reporters_lock;
+
+ mutex_lock(lock);
+ __devlink_health_reporter_destroy(reporter);
+ mutex_unlock(lock);
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
+
static int
devlink_nl_health_reporter_fill(struct sk_buff *msg,
struct devlink *devlink,
@@ -5259,6 +5540,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
if (devlink_nl_put_handle(msg, devlink))
goto genlmsg_cancel;
+ if (reporter->devlink_port) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
+ goto genlmsg_cancel;
+ }
reporter_attr = nla_nest_start_noflag(msg,
DEVLINK_ATTR_HEALTH_REPORTER);
if (!reporter_attr)
@@ -5466,17 +5751,28 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink,
struct nlattr **attrs)
{
struct devlink_health_reporter *reporter;
+ struct devlink_port *devlink_port;
char *reporter_name;
if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
return NULL;
reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
- mutex_lock(&devlink->reporters_lock);
- reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink->reporters_lock);
+ devlink_port = devlink_port_get_from_attrs(devlink, attrs);
+ if (IS_ERR(devlink_port)) {
+ mutex_lock(&devlink->reporters_lock);
+ reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
+ if (reporter)
+ refcount_inc(&reporter->refcount);
+ mutex_unlock(&devlink->reporters_lock);
+ } else {
+ mutex_lock(&devlink_port->reporters_lock);
+ reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name);
+ if (reporter)
+ refcount_inc(&reporter->refcount);
+ mutex_unlock(&devlink_port->reporters_lock);
+ }
+
return reporter;
}
@@ -5508,12 +5804,6 @@ unlock:
return NULL;
}
-static void
-devlink_health_reporter_put(struct devlink_health_reporter *reporter)
-{
- refcount_dec(&reporter->refcount);
-}
-
void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state)
@@ -5570,6 +5860,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
struct devlink_health_reporter *reporter;
+ struct devlink_port *port;
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
@@ -5600,6 +5891,31 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
}
mutex_unlock(&devlink->reporters_lock);
}
+
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ list_for_each_entry(port, &devlink->port_list, list) {
+ mutex_lock(&port->reporters_lock);
+ list_for_each_entry(reporter, &port->reporter_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&port->reporters_lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&port->reporters_lock);
+ }
+ }
out:
mutex_unlock(&devlink_mutex);
@@ -6107,7 +6423,7 @@ static int __devlink_trap_action_set(struct devlink *devlink,
}
err = devlink->ops->trap_action_set(devlink, trap_item->trap,
- trap_action);
+ trap_action, extack);
if (err)
return err;
@@ -6397,7 +6713,8 @@ static int devlink_trap_group_set(struct devlink *devlink,
}
policer = policer_item ? policer_item->policer : NULL;
- err = devlink->ops->trap_group_set(devlink, group_item->group, policer);
+ err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
+ extack);
if (err)
return err;
@@ -6721,6 +7038,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -6729,7 +7047,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_get_doit,
.dumpit = devlink_nl_cmd_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6752,24 +7069,20 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_unsplit_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
.dumpit = devlink_nl_cmd_sb_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -6777,8 +7090,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -6786,16 +7097,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -6803,16 +7111,14 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
.dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -6820,60 +7126,50 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_ESWITCH_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_entries_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_headers_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6881,20 +7177,17 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_counters_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6902,15 +7195,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_reload,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_get_doit,
.dumpit = devlink_nl_cmd_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6918,7 +7209,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_PORT_PARAM_GET,
@@ -6941,21 +7231,18 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_region_get_doit,
.dumpit = devlink_nl_cmd_region_get_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_NEW,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_new,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_del,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_READ,
@@ -6963,14 +7250,12 @@ static const struct genl_ops devlink_nl_ops[] = {
GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_region_read_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_info_get_doit,
.dumpit = devlink_nl_cmd_info_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6978,7 +7263,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
/* can be retrieved by unprivileged users */
},
@@ -6987,7 +7272,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -6995,7 +7280,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_recover_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7003,7 +7288,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7012,7 +7297,7 @@ static const struct genl_ops devlink_nl_ops[] = {
GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7020,7 +7305,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7028,46 +7313,39 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_flash_update,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GET,
.doit = devlink_nl_cmd_trap_get_doit,
.dumpit = devlink_nl_cmd_trap_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_SET,
.doit = devlink_nl_cmd_trap_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.doit = devlink_nl_cmd_trap_group_get_doit,
.dumpit = devlink_nl_cmd_trap_group_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
.doit = devlink_nl_cmd_trap_group_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.doit = devlink_nl_cmd_trap_policer_get_doit,
.dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
};
@@ -7132,9 +7410,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc);
*/
int devlink_register(struct devlink *devlink, struct device *dev)
{
- mutex_lock(&devlink_mutex);
devlink->dev = dev;
devlink->registered = true;
+ mutex_lock(&devlink_mutex);
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
@@ -7280,6 +7558,8 @@ int devlink_port_register(struct devlink *devlink,
list_add_tail(&devlink_port->list, &devlink->port_list);
INIT_LIST_HEAD(&devlink_port->param_list);
mutex_unlock(&devlink->lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -7296,6 +7576,8 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ mutex_destroy(&devlink_port->reporters_lock);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
@@ -7389,24 +7671,20 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
EXPORT_SYMBOL_GPL(devlink_port_type_clear);
static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- const unsigned char *switch_id,
- unsigned char switch_id_len)
+ enum devlink_port_flavour flavour)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
if (WARN_ON(devlink_port->registered))
return -EEXIST;
- attrs->set = true;
+ devlink_port->attrs_set = true;
attrs->flavour = flavour;
- if (switch_id) {
- attrs->switch_port = true;
- if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN))
- switch_id_len = MAX_PHYS_ITEM_ID_LEN;
- memcpy(attrs->switch_id.id, switch_id, switch_id_len);
- attrs->switch_id.id_len = switch_id_len;
+ if (attrs->switch_id.id_len) {
+ devlink_port->switch_port = true;
+ if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
+ attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
} else {
- attrs->switch_port = false;
+ devlink_port->switch_port = false;
}
return 0;
}
@@ -7415,33 +7693,18 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
* devlink_port_attrs_set - Set port attributes
*
* @devlink_port: devlink port
- * @flavour: flavour of the port
- * @port_number: number of the port that is facing user, for example
- * the front panel port number
- * @split: indicates if this is split port
- * @split_subport_number: if the port is split, this is the number
- * of subport.
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otwerwise this is NULL
- * @switch_id_len: length of the switch_id buffer
+ * @attrs: devlink port attrs
*/
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- u32 port_number, bool split,
- u32 split_subport_number,
- const unsigned char *switch_id,
- unsigned char switch_id_len)
+ struct devlink_port_attrs *attrs)
{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- ret = __devlink_port_attrs_set(devlink_port, flavour,
- switch_id, switch_id_len);
+ devlink_port->attrs = *attrs;
+ ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
if (ret)
return;
- attrs->split = split;
- attrs->phys.port_number = port_number;
- attrs->phys.split_subport_number = split_subport_number;
+ WARN_ON(attrs->splittable && attrs->split);
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
@@ -7450,20 +7713,14 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
*
* @devlink_port: devlink port
* @pf: associated PF for the devlink port instance
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otherwise this is NULL
- * @switch_id_len: length of the switch_id buffer
*/
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len, u16 pf)
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_PF,
- switch_id, switch_id_len);
+ DEVLINK_PORT_FLAVOUR_PCI_PF);
if (ret)
return;
@@ -7477,21 +7734,15 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
* @devlink_port: devlink port
* @pf: associated PF for the devlink port instance
* @vf: associated VF of a PF for the devlink port instance
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otherwise this is NULL
- * @switch_id_len: length of the switch_id buffer
*/
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len,
u16 pf, u16 vf)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_VF,
- switch_id, switch_id_len);
+ DEVLINK_PORT_FLAVOUR_PCI_VF);
if (ret)
return;
attrs->pci_vf.pf = pf;
@@ -7505,7 +7756,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int n = 0;
- if (!attrs->set)
+ if (!devlink_port->attrs_set)
return -EOPNOTSUPP;
switch (attrs->flavour) {
@@ -8551,6 +8802,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(PTP_GENERAL, CONTROL),
DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
+ DEVLINK_TRAP(EARLY_DROP, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -8800,7 +9052,8 @@ static void devlink_trap_disable(struct devlink *devlink,
if (WARN_ON_ONCE(!trap_item))
return;
- devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
+ NULL);
trap_item->action = DEVLINK_TRAP_ACTION_DROP;
}
@@ -9341,7 +9594,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
* any devlink lock as only permanent values are accessed.
*/
devlink_port = netdev_to_devlink_port(dev);
- if (!devlink_port || !devlink_port->attrs.switch_port)
+ if (!devlink_port || !devlink_port->switch_port)
return -EOPNOTSUPP;
memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd7eba9066f8..51678a528f85 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -14,6 +14,20 @@
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
+#include <linux/indirect_call_wrapper.h>
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+#define INDIRECT_CALL_MT(f, f2, f1, ...) \
+ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+#elif defined(CONFIG_IP_MULTIPLE_TABLES)
+#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__)
+#endif
static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(0),
@@ -267,7 +281,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
uid_gt(fl->flowi_uid, rule->uid_range.end))
goto out;
- ret = ops->match(rule, fl, flags);
+ ret = INDIRECT_CALL_MT(ops->match,
+ fib6_rule_match,
+ fib4_rule_match,
+ rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}
@@ -298,9 +315,15 @@ jumped:
} else if (rule->action == FR_ACT_NOP)
continue;
else
- err = ops->action(rule, fl, flags, arg);
-
- if (!err && ops->suppress && ops->suppress(rule, arg))
+ err = INDIRECT_CALL_MT(ops->action,
+ fib6_rule_action,
+ fib4_rule_action,
+ rule, fl, flags, arg);
+
+ if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress,
+ fib6_rule_suppress,
+ fib4_rule_suppress,
+ rule, arg))
continue;
if (err != -EAGAIN) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 82e1b5b06167..7124f0fe6974 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
@@ -73,6 +74,31 @@
#include <net/lwtunnel.h>
#include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h>
+#include <net/transp_v6.h>
+#include <linux/btf_ids.h>
+
+int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
+{
+ if (in_compat_syscall()) {
+ struct compat_sock_fprog f32;
+
+ if (len != sizeof(f32))
+ return -EINVAL;
+ if (copy_from_sockptr(&f32, src, sizeof(f32)))
+ return -EFAULT;
+ memset(dst, 0, sizeof(*dst));
+ dst->len = f32.len;
+ dst->filter = compat_ptr(f32.filter);
+ } else {
+ if (len != sizeof(*dst))
+ return -EINVAL;
+ if (copy_from_sockptr(dst, src, sizeof(*dst)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -3777,7 +3803,9 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_skb_output_btf_ids[5];
+BTF_ID_LIST(bpf_skb_output_btf_ids)
+BTF_ID(struct, sk_buff)
+
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
@@ -4171,7 +4199,9 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_xdp_output_btf_ids[5];
+BTF_ID_LIST(bpf_xdp_output_btf_ids)
+BTF_ID(struct, xdp_buff)
+
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
@@ -4289,10 +4319,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
char devname[IFNAMSIZ];
+ int val, valbool;
struct net *net;
int ifindex;
int ret = 0;
- int val;
if (!sk_fullsock(sk))
return -EINVAL;
@@ -4303,6 +4333,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
+ valbool = val ? 1 : 0;
/* Only some socketops are supported */
switch (optname) {
@@ -4361,6 +4392,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
ret = sock_bindtoindex(sk, ifindex, false);
break;
+ case SO_KEEPALIVE:
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
default:
ret = -EINVAL;
}
@@ -4421,6 +4457,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ret = tcp_set_congestion_control(sk, name, false,
reinit, true);
} else {
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (optlen != sizeof(int))
@@ -4449,6 +4486,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
tp->save_syn = val;
break;
+ case TCP_KEEPIDLE:
+ ret = tcp_sock_set_keepidle_locked(sk, val);
+ break;
+ case TCP_KEEPINTVL:
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ ret = -EINVAL;
+ else
+ tp->keepalive_intvl = val * HZ;
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ ret = -EINVAL;
+ else
+ tp->keepalive_probes = val;
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ ret = -EINVAL;
+ else
+ icsk->icsk_syn_retries = val;
+ break;
+ case TCP_USER_TIMEOUT:
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ icsk->icsk_user_timeout = val;
+ break;
default:
ret = -EINVAL;
}
@@ -6123,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func)
}
const struct bpf_func_proto bpf_event_output_data_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -6155,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_cg_sock_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6858,6 +6925,7 @@ static bool __sock_filter_check_attach_type(int off,
case offsetof(struct bpf_sock, priority):
switch (attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET_SOCK_RELEASE:
goto full_access;
default:
return false;
@@ -9187,6 +9255,189 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
+
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
+
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+ struct sock *, sk, u64, flags)
+{
+ if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+ BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+ return -EINVAL;
+ if (unlikely(sk && sk_is_refcounted(sk)))
+ return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+ if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+ return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+ /* Check if socket is suitable for packet L3/L4 protocol */
+ if (sk && sk->sk_protocol != ctx->protocol)
+ return -EPROTOTYPE;
+ if (sk && sk->sk_family != ctx->family &&
+ (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+ return -EAFNOSUPPORT;
+
+ if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+ return -EEXIST;
+
+ /* Select socket as lookup result */
+ ctx->selected_sk = sk;
+ ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+ .func = bpf_sk_lookup_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_lookup_assign_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static bool sk_lookup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ info->reg_type = PTR_TO_SOCKET_OR_NULL;
+ return size == sizeof(__u64);
+
+ case bpf_ctx_range(struct bpf_sk_lookup, family):
+ case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+ default:
+ return false;
+ }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, selected_sk));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, family):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ family, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ protocol, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, remote_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.saddr, 4, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.daddr, 4, target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case offsetof(struct bpf_sk_lookup, remote_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ sport, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ dport, 2, target_size));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+ .get_func_proto = sk_lookup_func_proto,
+ .is_valid_access = sk_lookup_is_valid_access,
+ .convert_ctx_access = sk_lookup_convert_ctx_access,
+};
+
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(xdp)
@@ -9195,3 +9446,132 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
+#endif
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+ int i;
+
+ /* only one argument, no need to check arg */
+ for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
+ if (btf_sock_ids[i] == btf_id)
+ return true;
+ return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+ /* tcp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct tcp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+ .func = bpf_skc_to_tcp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
+{
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
+ .func = bpf_skc_to_tcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
+ .func = bpf_skc_to_tcp_timewait_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
+ .func = bpf_skc_to_tcp_request_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
+};
+
+BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
+{
+ /* udp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct udp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+ sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
+ .func = bpf_skc_to_udp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
+};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 142a8824f0a8..29806eb765cf 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -383,6 +383,23 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
+void skb_flow_dissect_hash(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_hash *key;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH))
+ return;
+
+ key = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_HASH,
+ target_container);
+
+ key->hash = skb_get_hash_raw(skb);
+}
+EXPORT_SYMBOL(skb_flow_dissect_hash);
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 2076219b8ba5..d4474c812b64 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -430,7 +430,7 @@ EXPORT_SYMBOL(flow_indr_dev_unregister);
static void flow_block_indr_init(struct flow_block_cb *flow_block,
struct flow_block_offload *bo,
- struct net_device *dev, void *data,
+ struct net_device *dev, struct Qdisc *sch, void *data,
void *cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
@@ -438,6 +438,7 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block,
flow_block->indr.data = data;
flow_block->indr.cb_priv = cb_priv;
flow_block->indr.dev = dev;
+ flow_block->indr.sch = sch;
flow_block->indr.cleanup = cleanup;
}
@@ -445,7 +446,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv),
struct flow_block_offload *bo,
- struct net_device *dev, void *data,
+ struct net_device *dev,
+ struct Qdisc *sch, void *data,
void *indr_cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
@@ -455,7 +457,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
if (IS_ERR(block_cb))
goto out;
- flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup);
+ flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup);
list_add(&block_cb->indr.list, &flow_block_indr_list);
out:
@@ -463,7 +465,7 @@ out:
}
EXPORT_SYMBOL(flow_indr_block_cb_alloc);
-int flow_indr_dev_setup_offload(struct net_device *dev,
+int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb))
@@ -472,7 +474,7 @@ int flow_indr_dev_setup_offload(struct net_device *dev,
mutex_lock(&flow_indr_block_lock);
list_for_each_entry(this, &flow_block_indr_dev_list, list)
- this->cb(dev, this->cb_priv, type, bo, data, cleanup);
+ this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
mutex_unlock(&flow_indr_block_lock);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef6b5a8f629c..8e39e28b0a8d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1783,6 +1783,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 85a4b0101f76..68e0682450c6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1000,6 +1000,16 @@ static size_t rtnl_prop_list_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_proto_down_size(const struct net_device *dev)
+{
+ size_t size = nla_total_size(1);
+
+ if (dev->proto_down_reason)
+ size += nla_total_size(0) + nla_total_size(4);
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1041,7 +1051,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(4) /* IFLA_NEW_IFINDEX */
- + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + rtnl_proto_down_size(dev) /* proto down */
+ nla_total_size(4) /* IFLA_TARGET_NETNSID */
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
@@ -1416,13 +1426,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev)
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+ return dev_xdp_prog_id(dev, XDP_MODE_DRV);
}
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
- XDP_QUERY_PROG_HW);
+ return dev_xdp_prog_id(dev, XDP_MODE_HW);
}
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
@@ -1658,6 +1667,35 @@ nest_cancel:
return ret;
}
+static int rtnl_fill_proto_down(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *pr;
+ u32 preason;
+
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ goto nla_put_failure;
+
+ preason = dev->proto_down_reason;
+ if (!preason)
+ return 0;
+
+ pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON);
+ if (!pr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) {
+ nla_nest_cancel(skb, pr);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, pr);
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1708,13 +1746,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
atomic_read(&dev->carrier_down_count)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
atomic_read(&dev->carrier_up_count)) ||
nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
+ if (rtnl_fill_proto_down(skb, dev))
+ goto nla_put_failure;
+
if (event != IFLA_EVENT_NONE) {
if (nla_put_u32(skb, IFLA_EVENT, event))
goto nla_put_failure;
@@ -1834,6 +1874,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
+ [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2483,6 +2524,67 @@ static int do_set_master(struct net_device *dev, int ifindex,
return 0;
}
+static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = {
+ [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 },
+ [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 },
+};
+
+static int do_set_proto_down(struct net_device *dev,
+ struct nlattr *nl_proto_down,
+ struct nlattr *nl_proto_down_reason,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1];
+ const struct net_device_ops *ops = dev->netdev_ops;
+ unsigned long mask = 0;
+ u32 value;
+ bool proto_down;
+ int err;
+
+ if (!ops->ndo_change_proto_down) {
+ NL_SET_ERR_MSG(extack, "Protodown not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ if (nl_proto_down_reason) {
+ err = nla_parse_nested_deprecated(pdreason,
+ IFLA_PROTO_DOWN_REASON_MAX,
+ nl_proto_down_reason,
+ ifla_proto_down_reason_policy,
+ NULL);
+ if (err < 0)
+ return err;
+
+ if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) {
+ NL_SET_ERR_MSG(extack, "Invalid protodown reason value");
+ return -EINVAL;
+ }
+
+ value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]);
+
+ if (pdreason[IFLA_PROTO_DOWN_REASON_MASK])
+ mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]);
+
+ dev_change_proto_down_reason(dev, mask, value);
+ }
+
+ if (nl_proto_down) {
+ proto_down = nla_get_u8(nl_proto_down);
+
+ /* Dont turn off protodown if there are active reasons */
+ if (!proto_down && dev->proto_down_reason) {
+ NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
+ return -EBUSY;
+ }
+ err = dev_change_proto_down(dev,
+ proto_down);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
@@ -2771,9 +2873,9 @@ static int do_setlink(const struct sk_buff *skb,
}
err = 0;
- if (tb[IFLA_PROTO_DOWN]) {
- err = dev_change_proto_down(dev,
- nla_get_u8(tb[IFLA_PROTO_DOWN]));
+ if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) {
+ err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN],
+ tb[IFLA_PROTO_DOWN_REASON], extack);
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b8afefe6f6b6..2828f6d5ba89 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3758,7 +3758,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int err = -ENOMEM;
int i = 0;
int pos;
- int dummy;
if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
(skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
@@ -3780,7 +3779,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
}
__skb_push(head_skb, doffset);
- proto = skb_network_protocol(head_skb, &dummy);
+ proto = skb_network_protocol(head_skb, NULL);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
@@ -4413,7 +4412,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
* at the moment even if they are anonymous).
*/
if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+ !__pskb_pull_tail(skb, __skb_pagelen(skb)))
return -ENOMEM;
/* Easy case. Most of packets will go this way. */
@@ -4692,7 +4691,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
sk->sk_type == SOCK_STREAM) {
- skb = tcp_get_timestamping_opt_stats(sk);
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
opt_stats = true;
} else
#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 8ccdcdaaa673..49cd5ffe673e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -113,6 +113,7 @@
#include <linux/static_key.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/compat.h>
#include <linux/uaccess.h>
@@ -360,7 +361,8 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
return sizeof(tv);
}
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -370,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(tv32))
return -EINVAL;
- if (copy_from_user(&tv32, optval, sizeof(tv32)))
+ if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
tv.tv_sec = tv32.tv_sec;
tv.tv_usec = tv32.tv_usec;
@@ -379,14 +381,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(old_tv))
return -EINVAL;
- if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
+ if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
tv.tv_sec = old_tv.tv_sec;
tv.tv_usec = old_tv.tv_usec;
} else {
if (optlen < sizeof(tv))
return -EINVAL;
- if (copy_from_user(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(&tv, optval, sizeof(tv)))
return -EFAULT;
}
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
@@ -608,8 +610,7 @@ int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
}
EXPORT_SYMBOL(sock_bindtoindex);
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
- int optlen)
+static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -631,7 +632,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
memset(devname, 0, sizeof(devname));
ret = -EFAULT;
- if (copy_from_user(devname, optval, optlen))
+ if (copy_from_sockptr(devname, optval, optlen))
goto out;
index = 0;
@@ -695,15 +696,6 @@ out:
return ret;
}
-static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
- int valbool)
-{
- if (valbool)
- sock_set_flag(sk, bit);
- else
- sock_reset_flag(sk, bit);
-}
-
bool sk_mc_loop(struct sock *sk)
{
if (dev_recursion_level())
@@ -834,7 +826,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
@@ -853,7 +845,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -966,7 +958,7 @@ set_sndbuf:
ret = -EINVAL; /* 1003.1g */
break;
}
- if (copy_from_user(&ling, optval, sizeof(ling))) {
+ if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
ret = -EFAULT;
break;
}
@@ -1060,60 +1052,52 @@ set_sndbuf:
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
+ optlen, optname == SO_RCVTIMEO_OLD);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
+ optlen, optname == SO_SNDTIMEO_OLD);
break;
- case SO_ATTACH_FILTER:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
-
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
+ case SO_ATTACH_FILTER: {
+ struct sock_fprog fprog;
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
+ if (!ret)
ret = sk_attach_filter(&fprog, sk);
- }
break;
-
+ }
case SO_ATTACH_BPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_attach_bpf(ufd, sk);
}
break;
- case SO_ATTACH_REUSEPORT_CBPF:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
-
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
+ case SO_ATTACH_REUSEPORT_CBPF: {
+ struct sock_fprog fprog;
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
+ if (!ret)
ret = sk_reuseport_attach_filter(&fprog, sk);
- }
break;
-
+ }
case SO_ATTACH_REUSEPORT_EBPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_reuseport_attach_bpf(ufd, sk);
@@ -1193,7 +1177,7 @@ set_sndbuf:
if (sizeof(ulval) != sizeof(val) &&
optlen >= sizeof(ulval) &&
- get_user(ulval, (unsigned long __user *)optval)) {
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
ret = -EFAULT;
break;
}
@@ -1236,7 +1220,7 @@ set_sndbuf:
if (optlen != sizeof(struct sock_txtime)) {
ret = -EINVAL;
break;
- } else if (copy_from_user(&sk_txtime, optval,
+ } else if (copy_from_sockptr(&sk_txtime, optval,
sizeof(struct sock_txtime))) {
ret = -EFAULT;
break;
@@ -2802,20 +2786,6 @@ int sock_no_shutdown(struct socket *sock, int how)
}
EXPORT_SYMBOL(sock_no_shutdown);
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(sock_no_setsockopt);
-
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(sock_no_getsockopt);
-
int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
return -EOPNOTSUPP;
@@ -3243,20 +3213,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_common_getsockopt);
-#ifdef CONFIG_COMPAT
-int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
-
- if (sk->sk_prot->compat_getsockopt != NULL)
- return sk->sk_prot->compat_getsockopt(sk, level, optname,
- optval, optlen);
- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_sock_common_getsockopt);
-#endif
-
int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -3276,7 +3232,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
* Set socket options on an inet socket.
*/
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
@@ -3284,20 +3240,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_common_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
-
- if (sk->sk_prot->compat_setsockopt != NULL)
- return sk->sk_prot->compat_setsockopt(sk, level, optname,
- optval, optlen);
- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_sock_common_setsockopt);
-#endif
-
void sk_common_release(struct sock *sk)
{
if (sk->sk_prot->destroy)
@@ -3596,6 +3538,7 @@ int sock_load_diag_module(int family, int protocol)
#ifdef CONFIG_INET
if (family == AF_INET &&
protocol != IPPROTO_RAW &&
+ protocol < MAX_INET_PROTOS &&
!rcu_access_pointer(inet_protos[protocol]))
return -ENOENT;
#endif
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 0971f17e8e54..119f52a99dc1 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -681,6 +681,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
@@ -691,9 +692,11 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_stab",
+ .map_btf_id = &sock_map_btf_id,
};
-struct bpf_htab_elem {
+struct bpf_shtab_elem {
struct rcu_head rcu;
u32 hash;
struct sock *sk;
@@ -701,14 +704,14 @@ struct bpf_htab_elem {
u8 key[];
};
-struct bpf_htab_bucket {
+struct bpf_shtab_bucket {
struct hlist_head head;
raw_spinlock_t lock;
};
-struct bpf_htab {
+struct bpf_shtab {
struct bpf_map map;
- struct bpf_htab_bucket *buckets;
+ struct bpf_shtab_bucket *buckets;
u32 buckets_num;
u32 elem_size;
struct sk_psock_progs progs;
@@ -720,17 +723,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
return jhash(key, len, 0);
}
-static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
- u32 hash)
+static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab,
+ u32 hash)
{
return &htab->buckets[hash & (htab->buckets_num - 1)];
}
-static struct bpf_htab_elem *
+static struct bpf_shtab_elem *
sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
u32 key_size)
{
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
hlist_for_each_entry_rcu(elem, head, node) {
if (elem->hash == hash &&
@@ -743,10 +746,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -757,8 +760,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
return elem ? elem->sk : NULL;
}
-static void sock_hash_free_elem(struct bpf_htab *htab,
- struct bpf_htab_elem *elem)
+static void sock_hash_free_elem(struct bpf_shtab *htab,
+ struct bpf_shtab_elem *elem)
{
atomic_dec(&htab->count);
kfree_rcu(elem, rcu);
@@ -767,9 +770,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab,
static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
void *link_raw)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem_probe, *elem = link_raw;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem_probe, *elem = link_raw;
+ struct bpf_shtab_bucket *bucket;
WARN_ON_ONCE(!rcu_read_lock_held());
bucket = sock_hash_select_bucket(htab, elem->hash);
@@ -791,10 +794,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 hash, key_size = map->key_size;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
int ret = -ENOENT;
hash = sock_hash_bucket_hash(key, key_size);
@@ -812,12 +815,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
return ret;
}
-static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
- void *key, u32 key_size,
- u32 hash, struct sock *sk,
- struct bpf_htab_elem *old)
+static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
+ void *key, u32 key_size,
+ u32 hash, struct sock *sk,
+ struct bpf_shtab_elem *old)
{
- struct bpf_htab_elem *new;
+ struct bpf_shtab_elem *new;
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
if (!old) {
@@ -841,10 +844,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_elem *elem, *elem_new;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab_elem *elem, *elem_new;
+ struct bpf_shtab_bucket *bucket;
struct sk_psock_link *link;
struct sk_psock *psock;
int ret;
@@ -954,8 +957,8 @@ out:
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem, *elem_next;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem, *elem_next;
u32 hash, key_size = map->key_size;
struct hlist_head *head;
int i = 0;
@@ -969,7 +972,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
goto find_first_elem;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -981,7 +984,7 @@ find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -993,7 +996,7 @@ find_first_elem:
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
{
- struct bpf_htab *htab;
+ struct bpf_shtab *htab;
int i, err;
u64 cost;
@@ -1015,15 +1018,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&htab->map, attr);
htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
- htab->elem_size = sizeof(struct bpf_htab_elem) +
+ htab->elem_size = sizeof(struct bpf_shtab_elem) +
round_up(htab->map.key_size, 8);
if (htab->buckets_num == 0 ||
- htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+ htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) {
err = -EINVAL;
goto free_htab;
}
- cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+ cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) +
(u64) htab->elem_size * htab->map.max_entries;
if (cost >= U32_MAX - PAGE_SIZE) {
err = -EINVAL;
@@ -1034,7 +1037,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
- sizeof(struct bpf_htab_bucket),
+ sizeof(struct bpf_shtab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
bpf_map_charge_finish(&htab->map.memory);
@@ -1055,10 +1058,10 @@ free_htab:
static void sock_hash_free(struct bpf_map *map)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_bucket *bucket;
struct hlist_head unlink_list;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
struct hlist_node *node;
int i;
@@ -1134,7 +1137,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
static void sock_hash_release_progs(struct bpf_map *map)
{
- psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+ psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs);
}
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
@@ -1214,6 +1217,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
@@ -1224,6 +1228,8 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_shtab",
+ .map_btf_id = &sock_hash_map_btf_id,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1232,7 +1238,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
case BPF_MAP_TYPE_SOCKMAP:
return &container_of(map, struct bpf_stab, map)->progs;
case BPF_MAP_TYPE_SOCKHASH:
- return &container_of(map, struct bpf_htab, map)->progs;
+ return &container_of(map, struct bpf_shtab, map)->progs;
default:
break;
}
diff --git a/net/core/tso.c b/net/core/tso.c
index d4d5c077ad72..4148f6d48953 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -6,18 +6,17 @@
#include <asm/unaligned.h>
/* Calculate expected number of TX descriptors */
-int tso_count_descs(struct sk_buff *skb)
+int tso_count_descs(const struct sk_buff *skb)
{
/* The Marvell Way */
return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
}
EXPORT_SYMBOL(tso_count_descs);
-void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
- struct tcphdr *tcph;
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int hdr_len = skb_transport_offset(skb) + tso->tlen;
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
@@ -30,23 +29,31 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
} else {
struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
- iph->payload_len = htons(size + tcp_hdrlen(skb));
+ iph->payload_len = htons(size + tso->tlen);
}
- tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
- put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+ hdr += skb_transport_offset(skb);
+ if (tso->tlen != sizeof(struct udphdr)) {
+ struct tcphdr *tcph = (struct tcphdr *)hdr;
- if (!is_last) {
- /* Clear all special flags for not last packet */
- tcph->psh = 0;
- tcph->fin = 0;
- tcph->rst = 0;
+ put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+ } else {
+ struct udphdr *uh = (struct udphdr *)hdr;
+
+ uh->len = htons(sizeof(*uh) + size);
}
}
EXPORT_SYMBOL(tso_build_hdr);
-void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size)
{
- tso->tcp_seq += size;
+ tso->tcp_seq += size; /* not worth avoiding this operation for UDP */
tso->size -= size;
tso->data += size;
@@ -62,12 +69,14 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
}
EXPORT_SYMBOL(tso_build_data);
-void tso_start(struct sk_buff *skb, struct tso_t *tso)
+int tso_start(struct sk_buff *skb, struct tso_t *tso)
{
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
+ int hdr_len = skb_transport_offset(skb) + tlen;
+ tso->tlen = tlen;
tso->ip_id = ntohs(ip_hdr(skb)->id);
- tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0;
tso->next_frag_idx = 0;
tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
@@ -83,5 +92,6 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
tso->data = skb_frag_address(frag);
tso->next_frag_idx++;
}
+ return hdr_len;
}
EXPORT_SYMBOL(tso_start);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 3c45f99e26d5..48aba933a5a8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
-int xdp_attachment_query(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf)
-{
- bpf->prog_id = info->prog ? info->prog->aux->id : 0;
- bpf->prog_flags = info->prog ? info->flags : 0;
- return 0;
-}
-EXPORT_SYMBOL_GPL(xdp_attachment_query);
-
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{