diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 2 | ||||
-rw-r--r-- | net/core/datagram.c | 27 | ||||
-rw-r--r-- | net/core/dev.c | 145 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 1 | ||||
-rw-r--r-- | net/core/devlink.c | 178 | ||||
-rw-r--r-- | net/core/ethtool.c | 3116 | ||||
-rw-r--r-- | net/core/filter.c | 221 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 13 | ||||
-rw-r--r-- | net/core/net_namespace.c | 99 | ||||
-rw-r--r-- | net/core/page_pool.c | 89 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 5 | ||||
-rw-r--r-- | net/core/skbuff.c | 143 | ||||
-rw-r--r-- | net/core/sock.c | 2 | ||||
-rw-r--r-- | net/core/sock_reuseport.c | 1 | ||||
-rw-r--r-- | net/core/timestamping.c | 20 |
15 files changed, 571 insertions, 3491 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index a104dc8faafc..3e2c378e5f31 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o diff --git a/net/core/datagram.c b/net/core/datagram.c index da3c24ed129c..a78e7f864c1e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -84,7 +84,8 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i /* * Wait for the last received packet to be different from skb */ -int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, + int *err, long *timeo_p, const struct sk_buff *skb) { int error; @@ -97,7 +98,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (READ_ONCE(sk->sk_receive_queue.prev) != skb) + if (READ_ONCE(queue->prev) != skb) goto out; /* Socket shut down? */ @@ -209,6 +210,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, /** * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket + * @queue: socket queue from which to receive * @flags: MSG\_ flags * @destructor: invoked under the receive lock on successful dequeue * @off: an offset in bytes to peek skb from. Returns an offset @@ -241,13 +243,14 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last) { - struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb; unsigned long cpu_flags; /* @@ -278,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); + } while (READ_ONCE(queue->prev) != *last); error = -EAGAIN; @@ -288,7 +291,9 @@ no_packet: } EXPORT_SYMBOL(__skb_try_recv_datagram); -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, +struct sk_buff *__skb_recv_datagram(struct sock *sk, + struct sk_buff_head *sk_queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err) @@ -299,15 +304,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, destructor, off, err, - &last); + skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor, + off, err, &last); if (skb) return skb; if (*err != -EAGAIN) break; } while (timeo && - !__skb_wait_for_more_packets(sk, err, &timeo, last)); + !__skb_wait_for_more_packets(sk, sk_queue, err, + &timeo, last)); return NULL; } @@ -318,7 +324,8 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, { int off = 0; - return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + return __skb_recv_datagram(sk, &sk->sk_receive_queue, + flags | (noblock ? MSG_DONTWAIT : 0), NULL, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 3d3ea1c30cf0..17529d49faec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1764,7 +1764,6 @@ EXPORT_SYMBOL(register_netdevice_notifier); int unregister_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; struct net *net; int err; @@ -1775,16 +1774,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb) if (err) goto unlock; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } + for_each_net(net) + call_netdevice_unregister_net_notifiers(nb, net); + unlock: rtnl_unlock(); up_write(&pernet_ops_rwsem); @@ -1792,6 +1784,42 @@ unlock: } EXPORT_SYMBOL(unregister_netdevice_notifier); +static int __register_netdevice_notifier_net(struct net *net, + struct notifier_block *nb, + bool ignore_call_fail) +{ + int err; + + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + return err; + if (dev_boot_phase) + return 0; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err && !ignore_call_fail) + goto chain_unregister; + + return 0; + +chain_unregister: + raw_notifier_chain_unregister(&net->netdev_chain, nb); + return err; +} + +static int __unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + return err; + + call_netdevice_unregister_net_notifiers(nb, net); + return 0; +} + /** * register_netdevice_notifier_net - register a per-netns network notifier block * @net: network namespace @@ -1812,23 +1840,9 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) int err; rtnl_lock(); - err = raw_notifier_chain_register(&net->netdev_chain, nb); - if (err) - goto unlock; - if (dev_boot_phase) - goto unlock; - - err = call_netdevice_register_net_notifiers(nb, net); - if (err) - goto chain_unregister; - -unlock: + err = __register_netdevice_notifier_net(net, nb, false); rtnl_unlock(); return err; - -chain_unregister: - raw_notifier_chain_unregister(&netdev_chain, nb); - goto unlock; } EXPORT_SYMBOL(register_netdevice_notifier_net); @@ -1854,17 +1868,53 @@ int unregister_netdevice_notifier_net(struct net *net, int err; rtnl_lock(); - err = raw_notifier_chain_unregister(&net->netdev_chain, nb); - if (err) - goto unlock; + err = __unregister_netdevice_notifier_net(net, nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); - call_netdevice_unregister_net_notifiers(nb, net); +int register_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; -unlock: + rtnl_lock(); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); + if (!err) { + nn->nb = nb; + list_add(&nn->list, &dev->net_notifier_list); + } rtnl_unlock(); return err; } -EXPORT_SYMBOL(unregister_netdevice_notifier_net); +EXPORT_SYMBOL(register_netdevice_notifier_dev_net); + +int unregister_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; + + rtnl_lock(); + list_del(&nn->list); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net); + +static void move_netdevice_notifiers_dev_net(struct net_device *dev, + struct net *net) +{ + struct netdev_net_notifier *nn; + + list_for_each_entry(nn, &dev->net_notifier_list, list) { + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb); + __register_netdevice_notifier_net(net, nn->nb, true); + } +} /** * call_netdevice_notifiers_info - call all network notifier blocks @@ -3249,7 +3299,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; @@ -4932,7 +4982,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { -#ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { int ingress_retval; @@ -4946,7 +4995,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, rcu_read_unlock(); return ingress_retval; } -#endif /* CONFIG_NETFILTER_INGRESS */ return 0; } @@ -8553,7 +8601,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { + bool non_hw = !(flags & XDP_FLAGS_HW_MODE); + struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; + int err; + + if (non_hw) { + prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, + XDP_QUERY_PROG)); + if (IS_ERR(prev_prog)) + prev_prog = NULL; + } memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -8564,7 +8622,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, xdp.flags = flags; xdp.prog = prog; - return bpf_op(dev, &xdp); + err = bpf_op(dev, &xdp); + if (!err && non_hw) + bpf_prog_change_xdp(prev_prog, prog); + + if (prev_prog) + bpf_prog_put(prev_prog); + + return err; } static void dev_xdp_uninstall(struct net_device *dev) @@ -9268,7 +9333,7 @@ int register_netdevice(struct net_device *dev) /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ - dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); dev->features |= NETIF_F_SOFT_FEATURES; if (dev->netdev_ops->ndo_udp_tunnel_add) { @@ -9763,6 +9828,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); + INIT_LIST_HEAD(&dev->net_notifier_list); #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif @@ -9833,6 +9899,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + free_percpu(dev->xdp_bulkq); + dev->xdp_bulkq = NULL; netdev_unregister_lockdep_key(dev); @@ -10024,6 +10092,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); netdev_adjacent_del_links(dev); + /* Move per-net netdevice notifiers that are following the netdevice */ + move_netdevice_notifiers_dev_net(dev, net); + /* Actually switch the network namespace */ dev_net_set(dev, net); dev->ifindex = new_ifindex; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5163d900bb4f..dbaebbe573f0 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: tx_type_valid = 1; break; } diff --git a/net/core/devlink.c b/net/core/devlink.c index f76219bf0c21..ca1df0ec3c97 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4843,22 +4843,100 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); -void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) { - if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && - state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void devlink_recover_notify(struct devlink_health_reporter *reporter, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) return; - if (reporter->health_state == state) + err = devlink_nl_health_reporter_fill(msg, reporter->devlink, + reporter, cmd, 0, 0, 0); + if (err) { + nlmsg_free(msg); return; + } - reporter->health_state = state; - trace_devlink_health_reporter_state_update(reporter->devlink, - reporter->ops->name, state); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(reporter->devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); + +void +devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) +{ + reporter->recovery_count++; + reporter->last_recovery_ts = jiffies; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); static int devlink_health_reporter_recover(struct devlink_health_reporter *reporter, @@ -4876,9 +4954,9 @@ devlink_health_reporter_recover(struct devlink_health_reporter *reporter, if (err) return err; - reporter->recovery_count++; + devlink_health_reporter_recovery_done(reporter); reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - reporter->last_recovery_ts = jiffies; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); return 0; } @@ -4945,6 +5023,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, reporter->error_count++; prev_health_state = reporter->health_state; reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); /* abort if the previous error wasn't recovered */ if (reporter->auto_recover && @@ -5027,68 +5106,23 @@ devlink_health_reporter_put(struct devlink_health_reporter *reporter) refcount_dec(&reporter->refcount); } -static int -devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink *devlink, - struct devlink_health_reporter *reporter, - enum devlink_command cmd, u32 portid, - u32 seq, int flags) +void +devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, + enum devlink_health_reporter_state state) { - struct nlattr *reporter_attr; - void *hdr; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - if (devlink_nl_put_handle(msg, devlink)) - goto genlmsg_cancel; - - reporter_attr = nla_nest_start_noflag(msg, - DEVLINK_ATTR_HEALTH_REPORTER); - if (!reporter_attr) - goto genlmsg_cancel; - if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, - reporter->ops->name)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, - reporter->health_state)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, - reporter->error_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, - reporter->recovery_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, - reporter->graceful_period, - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, - reporter->auto_recover)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, - jiffies_to_msecs(reporter->dump_ts), - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, - reporter->dump_real_ts, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; + if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && + state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) + return; - nla_nest_end(msg, reporter_attr); - genlmsg_end(msg, hdr); - return 0; + if (reporter->health_state == state) + return; -reporter_nest_cancel: - nla_nest_end(msg, reporter_attr); -genlmsg_cancel: - genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + reporter->health_state = state; + trace_devlink_health_reporter_state_update(reporter->devlink, + reporter->ops->name, state); + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); } +EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, struct genl_info *info) @@ -7674,6 +7708,9 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION), DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION), DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION), + DEVLINK_TRAP(NON_ROUTABLE, DROP), + DEVLINK_TRAP(DECAP_ERROR, EXCEPTION), + DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ @@ -7686,6 +7723,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(L2_DROPS), DEVLINK_TRAP_GROUP(L3_DROPS), DEVLINK_TRAP_GROUP(BUFFER_DROPS), + DEVLINK_TRAP_GROUP(TUNNEL_DROPS), }; static int devlink_trap_generic_verify(const struct devlink_trap *trap) diff --git a/net/core/ethtool.c b/net/core/ethtool.c deleted file mode 100644 index cd9bc67381b2..000000000000 --- a/net/core/ethtool.c +++ /dev/null @@ -1,3116 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/core/ethtool.c - Ethtool ioctl handler - * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> - * - * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/ethtool.h> -#include <linux/netdevice.h> -#include <linux/net_tstamp.h> -#include <linux/phy.h> -#include <linux/bitops.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/sfp.h> -#include <linux/slab.h> -#include <linux/rtnetlink.h> -#include <linux/sched/signal.h> -#include <linux/net.h> -#include <net/devlink.h> -#include <net/xdp_sock.h> -#include <net/flow_offload.h> - -/* - * Some useful ethtool_ops methods that're device independent. - * If we find that all drivers want to do the same thing here, - * we can turn these into dev_() function calls. - */ - -u32 ethtool_op_get_link(struct net_device *dev) -{ - return netif_carrier_ok(dev) ? 1 : 0; -} -EXPORT_SYMBOL(ethtool_op_get_link); - -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) -{ - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; - return 0; -} -EXPORT_SYMBOL(ethtool_op_get_ts_info); - -/* Handlers for each ethtool command */ - -#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) - -static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { - [NETIF_F_SG_BIT] = "tx-scatter-gather", - [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", - [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", - [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", - [NETIF_F_HIGHDMA_BIT] = "highdma", - [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", - [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", - - [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", - [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", - [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", - [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", - [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", - [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", - [NETIF_F_GSO_BIT] = "tx-generic-segmentation", - [NETIF_F_LLTX_BIT] = "tx-lockless", - [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", - [NETIF_F_GRO_BIT] = "rx-gro", - [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", - [NETIF_F_LRO_BIT] = "rx-lro", - - [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", - [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", - [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", - [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", - [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", - [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", - [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", - [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", - [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", - [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", - [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", - [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", - - [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", - [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", - [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", - [NETIF_F_RXHASH_BIT] = "rx-hashing", - [NETIF_F_RXCSUM_BIT] = "rx-checksum", - [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", - [NETIF_F_LOOPBACK_BIT] = "loopback", - [NETIF_F_RXFCS_BIT] = "rx-fcs", - [NETIF_F_RXALL_BIT] = "rx-all", - [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", - [NETIF_F_HW_TC_BIT] = "hw-tc-offload", - [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", - [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", - [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", - [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", - [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", - [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", -}; - -static const char -rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { - [ETH_RSS_HASH_TOP_BIT] = "toeplitz", - [ETH_RSS_HASH_XOR_BIT] = "xor", - [ETH_RSS_HASH_CRC32_BIT] = "crc32", -}; - -static const char -tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", - [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", - [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", -}; - -static const char -phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", - [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", - [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", -}; - -static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gfeatures cmd = { - .cmd = ETHTOOL_GFEATURES, - .size = ETHTOOL_DEV_FEATURE_WORDS, - }; - struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; - u32 __user *sizeaddr; - u32 copy_size; - int i; - - /* in case feature bits run out again */ - BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t)); - - for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { - features[i].available = (u32)(dev->hw_features >> (32 * i)); - features[i].requested = (u32)(dev->wanted_features >> (32 * i)); - features[i].active = (u32)(dev->features >> (32 * i)); - features[i].never_changed = - (u32)(NETIF_F_NEVER_CHANGE >> (32 * i)); - } - - sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); - if (get_user(copy_size, sizeaddr)) - return -EFAULT; - - if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) - copy_size = ETHTOOL_DEV_FEATURE_WORDS; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - useraddr += sizeof(cmd); - if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) - return -EFAULT; - - return 0; -} - -static int ethtool_set_features(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_sfeatures cmd; - struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; - netdev_features_t wanted = 0, valid = 0; - int i, ret = 0; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - useraddr += sizeof(cmd); - - if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) - return -EINVAL; - - if (copy_from_user(features, useraddr, sizeof(features))) - return -EFAULT; - - for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { - valid |= (netdev_features_t)features[i].valid << (32 * i); - wanted |= (netdev_features_t)features[i].requested << (32 * i); - } - - if (valid & ~NETIF_F_ETHTOOL_BITS) - return -EINVAL; - - if (valid & ~dev->hw_features) { - valid &= dev->hw_features; - ret |= ETHTOOL_F_UNSUPPORTED; - } - - dev->wanted_features &= ~valid; - dev->wanted_features |= wanted & valid; - __netdev_update_features(dev); - - if ((dev->wanted_features ^ dev->features) & valid) - ret |= ETHTOOL_F_WISH; - - return ret; -} - -static int __ethtool_get_sset_count(struct net_device *dev, int sset) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (sset == ETH_SS_FEATURES) - return ARRAY_SIZE(netdev_features_strings); - - if (sset == ETH_SS_RSS_HASH_FUNCS) - return ARRAY_SIZE(rss_hash_func_strings); - - if (sset == ETH_SS_TUNABLES) - return ARRAY_SIZE(tunable_strings); - - if (sset == ETH_SS_PHY_TUNABLES) - return ARRAY_SIZE(phy_tunable_strings); - - if (sset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - return phy_ethtool_get_sset_count(dev->phydev); - - if (ops->get_sset_count && ops->get_strings) - return ops->get_sset_count(dev, sset); - else - return -EOPNOTSUPP; -} - -static void __ethtool_get_strings(struct net_device *dev, - u32 stringset, u8 *data) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (stringset == ETH_SS_FEATURES) - memcpy(data, netdev_features_strings, - sizeof(netdev_features_strings)); - else if (stringset == ETH_SS_RSS_HASH_FUNCS) - memcpy(data, rss_hash_func_strings, - sizeof(rss_hash_func_strings)); - else if (stringset == ETH_SS_TUNABLES) - memcpy(data, tunable_strings, sizeof(tunable_strings)); - else if (stringset == ETH_SS_PHY_TUNABLES) - memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings)); - else if (stringset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - phy_ethtool_get_strings(dev->phydev, data); - else - /* ops->get_strings is valid because checked earlier */ - ops->get_strings(dev, stringset, data); -} - -static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) -{ - /* feature masks of legacy discrete ethtool ops */ - - switch (eth_cmd) { - case ETHTOOL_GTXCSUM: - case ETHTOOL_STXCSUM: - return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; - case ETHTOOL_GRXCSUM: - case ETHTOOL_SRXCSUM: - return NETIF_F_RXCSUM; - case ETHTOOL_GSG: - case ETHTOOL_SSG: - return NETIF_F_SG; - case ETHTOOL_GTSO: - case ETHTOOL_STSO: - return NETIF_F_ALL_TSO; - case ETHTOOL_GGSO: - case ETHTOOL_SGSO: - return NETIF_F_GSO; - case ETHTOOL_GGRO: - case ETHTOOL_SGRO: - return NETIF_F_GRO; - default: - BUG(); - } -} - -static int ethtool_get_one_feature(struct net_device *dev, - char __user *useraddr, u32 ethcmd) -{ - netdev_features_t mask = ethtool_get_feature_mask(ethcmd); - struct ethtool_value edata = { - .cmd = ethcmd, - .data = !!(dev->features & mask), - }; - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_one_feature(struct net_device *dev, - void __user *useraddr, u32 ethcmd) -{ - struct ethtool_value edata; - netdev_features_t mask; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - mask = ethtool_get_feature_mask(ethcmd); - mask &= dev->hw_features; - if (!mask) - return -EOPNOTSUPP; - - if (edata.data) - dev->wanted_features |= mask; - else - dev->wanted_features &= ~mask; - - __netdev_update_features(dev); - - return 0; -} - -#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ - ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) -#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \ - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \ - NETIF_F_RXHASH) - -static u32 __ethtool_get_flags(struct net_device *dev) -{ - u32 flags = 0; - - if (dev->features & NETIF_F_LRO) - flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) - flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) - flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) - flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) - flags |= ETH_FLAG_RXHASH; - - return flags; -} - -static int __ethtool_set_flags(struct net_device *dev, u32 data) -{ - netdev_features_t features = 0, changed; - - if (data & ~ETH_ALL_FLAGS) - return -EINVAL; - - if (data & ETH_FLAG_LRO) - features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) - features |= NETIF_F_HW_VLAN_CTAG_RX; - if (data & ETH_FLAG_TXVLAN) - features |= NETIF_F_HW_VLAN_CTAG_TX; - if (data & ETH_FLAG_NTUPLE) - features |= NETIF_F_NTUPLE; - if (data & ETH_FLAG_RXHASH) - features |= NETIF_F_RXHASH; - - /* allow changing only bits set in hw_features */ - changed = (features ^ dev->features) & ETH_ALL_FEATURES; - if (changed & ~dev->hw_features) - return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; - - dev->wanted_features = - (dev->wanted_features & ~changed) | (features & changed); - - __netdev_update_features(dev); - - return 0; -} - -/* Given two link masks, AND them together and save the result in dst. */ -void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, - struct ethtool_link_ksettings *src) -{ - unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS); - unsigned int idx = 0; - - for (; idx < size; idx++) { - dst->link_modes.supported[idx] &= - src->link_modes.supported[idx]; - dst->link_modes.advertising[idx] &= - src->link_modes.advertising[idx]; - } -} -EXPORT_SYMBOL(ethtool_intersect_link_masks); - -void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, - u32 legacy_u32) -{ - bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); - dst[0] = legacy_u32; -} -EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); - -/* return false if src had higher bits set. lower bits always updated. */ -bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) -{ - bool retval = true; - - /* TODO: following test will soon always be true */ - if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(ext); - - bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_fill(ext, 32); - bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - if (bitmap_intersects(ext, src, - __ETHTOOL_LINK_MODE_MASK_NBITS)) { - /* src mask goes beyond bit 31 */ - retval = false; - } - } - *legacy_u32 = src[0]; - return retval; -} -EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); - -/* return false if legacy contained non-0 deprecated fields - * maxtxpkt/maxrxpkt. rest of ksettings always updated - */ -static bool -convert_legacy_settings_to_link_ksettings( - struct ethtool_link_ksettings *link_ksettings, - const struct ethtool_cmd *legacy_settings) -{ - bool retval = true; - - memset(link_ksettings, 0, sizeof(*link_ksettings)); - - /* This is used to tell users that driver is still using these - * deprecated legacy fields, and they should not use - * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS - */ - if (legacy_settings->maxtxpkt || - legacy_settings->maxrxpkt) - retval = false; - - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.supported, - legacy_settings->supported); - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.advertising, - legacy_settings->advertising); - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.lp_advertising, - legacy_settings->lp_advertising); - link_ksettings->base.speed - = ethtool_cmd_speed(legacy_settings); - link_ksettings->base.duplex - = legacy_settings->duplex; - link_ksettings->base.port - = legacy_settings->port; - link_ksettings->base.phy_address - = legacy_settings->phy_address; - link_ksettings->base.autoneg - = legacy_settings->autoneg; - link_ksettings->base.mdio_support - = legacy_settings->mdio_support; - link_ksettings->base.eth_tp_mdix - = legacy_settings->eth_tp_mdix; - link_ksettings->base.eth_tp_mdix_ctrl - = legacy_settings->eth_tp_mdix_ctrl; - return retval; -} - -/* return false if ksettings link modes had higher bits - * set. legacy_settings always updated (best effort) - */ -static bool -convert_link_ksettings_to_legacy_settings( - struct ethtool_cmd *legacy_settings, - const struct ethtool_link_ksettings *link_ksettings) -{ - bool retval = true; - - memset(legacy_settings, 0, sizeof(*legacy_settings)); - /* this also clears the deprecated fields in legacy structure: - * __u8 transceiver; - * __u32 maxtxpkt; - * __u32 maxrxpkt; - */ - - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->supported, - link_ksettings->link_modes.supported); - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->advertising, - link_ksettings->link_modes.advertising); - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->lp_advertising, - link_ksettings->link_modes.lp_advertising); - ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); - legacy_settings->duplex - = link_ksettings->base.duplex; - legacy_settings->port - = link_ksettings->base.port; - legacy_settings->phy_address - = link_ksettings->base.phy_address; - legacy_settings->autoneg - = link_ksettings->base.autoneg; - legacy_settings->mdio_support - = link_ksettings->base.mdio_support; - legacy_settings->eth_tp_mdix - = link_ksettings->base.eth_tp_mdix; - legacy_settings->eth_tp_mdix_ctrl - = link_ksettings->base.eth_tp_mdix_ctrl; - legacy_settings->transceiver - = link_ksettings->base.transceiver; - return retval; -} - -/* number of 32-bit words to store the user's link mode bitmaps */ -#define __ETHTOOL_LINK_MODE_MASK_NU32 \ - DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) - -/* layout of the struct passed from/to userland */ -struct ethtool_link_usettings { - struct ethtool_link_settings base; - struct { - __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32]; - __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; - __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; - } link_modes; -}; - -/* Internal kernel helper to query a device ethtool_link_settings. */ -int __ethtool_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *link_ksettings) -{ - ASSERT_RTNL(); - - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - memset(link_ksettings, 0, sizeof(*link_ksettings)); - return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); -} -EXPORT_SYMBOL(__ethtool_get_link_ksettings); - -/* convert ethtool_link_usettings in user space to a kernel internal - * ethtool_link_ksettings. return 0 on success, errno on error. - */ -static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, - const void __user *from) -{ - struct ethtool_link_usettings link_usettings; - - if (copy_from_user(&link_usettings, from, sizeof(link_usettings))) - return -EFAULT; - - memcpy(&to->base, &link_usettings.base, sizeof(to->base)); - bitmap_from_arr32(to->link_modes.supported, - link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_from_arr32(to->link_modes.advertising, - link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_from_arr32(to->link_modes.lp_advertising, - link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - - return 0; -} - -/* convert a kernel internal ethtool_link_ksettings to - * ethtool_link_usettings in user space. return 0 on success, errno on - * error. - */ -static int -store_link_ksettings_for_user(void __user *to, - const struct ethtool_link_ksettings *from) -{ - struct ethtool_link_usettings link_usettings; - - memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); - bitmap_to_arr32(link_usettings.link_modes.supported, - from->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_arr32(link_usettings.link_modes.advertising, - from->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_arr32(link_usettings.link_modes.lp_advertising, - from->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - - if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) - return -EFAULT; - - return 0; -} - -/* Query device for its ethtool_link_settings. */ -static int ethtool_get_link_ksettings(struct net_device *dev, - void __user *useraddr) -{ - int err = 0; - struct ethtool_link_ksettings link_ksettings; - - ASSERT_RTNL(); - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - /* handle bitmap nbits handshake */ - if (copy_from_user(&link_ksettings.base, useraddr, - sizeof(link_ksettings.base))) - return -EFAULT; - - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) { - /* wrong link mode nbits requested */ - memset(&link_ksettings, 0, sizeof(link_ksettings)); - link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; - /* send back number of words required as negative val */ - compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX, - "need too many bits for link modes!"); - link_ksettings.base.link_mode_masks_nwords - = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32); - - /* copy the base fields back to user, not the link - * mode bitmaps - */ - if (copy_to_user(useraddr, &link_ksettings.base, - sizeof(link_ksettings.base))) - return -EFAULT; - - return 0; - } - - /* handshake successful: user/kernel agree on - * link_mode_masks_nwords - */ - - memset(&link_ksettings, 0, sizeof(link_ksettings)); - err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); - if (err < 0) - return err; - - /* make sure we tell the right values to user */ - link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; - link_ksettings.base.link_mode_masks_nwords - = __ETHTOOL_LINK_MODE_MASK_NU32; - - return store_link_ksettings_for_user(useraddr, &link_ksettings); -} - -/* Update device ethtool_link_settings. */ -static int ethtool_set_link_ksettings(struct net_device *dev, - void __user *useraddr) -{ - int err; - struct ethtool_link_ksettings link_ksettings; - - ASSERT_RTNL(); - - if (!dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; - - /* make sure nbits field has expected value */ - if (copy_from_user(&link_ksettings.base, useraddr, - sizeof(link_ksettings.base))) - return -EFAULT; - - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) - return -EINVAL; - - /* copy the whole structure, now that we know it has expected - * format - */ - err = load_link_ksettings_from_user(&link_ksettings, useraddr); - if (err) - return err; - - /* re-check nwords field, just in case */ - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) - return -EINVAL; - - return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); -} - -/* Query device for its ethtool_cmd settings. - * - * Backward compatibility note: for compatibility with legacy ethtool, this is - * now implemented via get_link_ksettings. When driver reports higher link mode - * bits, a kernel warning is logged once (with name of 1st driver/device) to - * recommend user to upgrade ethtool, but the command is successful (only the - * lower link mode bits reported back to user). Deprecated fields from - * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero. - */ -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_link_ksettings link_ksettings; - struct ethtool_cmd cmd; - int err; - - ASSERT_RTNL(); - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - memset(&link_ksettings, 0, sizeof(link_ksettings)); - err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); - if (err < 0) - return err; - convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings); - - /* send a sensible cmd tag back to user */ - cmd.cmd = ETHTOOL_GSET; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - - return 0; -} - -/* Update device link settings with given ethtool_cmd. - * - * Backward compatibility note: for compatibility with legacy ethtool, this is - * now always implemented via set_link_settings. When user's request updates - * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel - * warning is logged once (with name of 1st driver/device) to recommend user to - * upgrade ethtool, and the request is rejected. - */ -static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_link_ksettings link_ksettings; - struct ethtool_cmd cmd; - - ASSERT_RTNL(); - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - if (!dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; - - if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd)) - return -EINVAL; - link_ksettings.base.link_mode_masks_nwords = - __ETHTOOL_LINK_MODE_MASK_NU32; - return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); -} - -static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_drvinfo info; - const struct ethtool_ops *ops = dev->ethtool_ops; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; - if (ops->get_drvinfo) { - ops->get_drvinfo(dev, &info); - } else if (dev->dev.parent && dev->dev.parent->driver) { - strlcpy(info.bus_info, dev_name(dev->dev.parent), - sizeof(info.bus_info)); - strlcpy(info.driver, dev->dev.parent->driver->name, - sizeof(info.driver)); - } else { - return -EOPNOTSUPP; - } - - /* - * this method of obtaining string set info is deprecated; - * Use ETHTOOL_GSSET_INFO instead. - */ - if (ops->get_sset_count) { - int rc; - - rc = ops->get_sset_count(dev, ETH_SS_TEST); - if (rc >= 0) - info.testinfo_len = rc; - rc = ops->get_sset_count(dev, ETH_SS_STATS); - if (rc >= 0) - info.n_stats = rc; - rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); - if (rc >= 0) - info.n_priv_flags = rc; - } - if (ops->get_regs_len) { - int ret = ops->get_regs_len(dev); - - if (ret > 0) - info.regdump_len = ret; - } - - if (ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (!info.fw_version[0]) - devlink_compat_running_version(dev, info.fw_version, - sizeof(info.fw_version)); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_sset_info info; - u64 sset_mask; - int i, idx = 0, n_bits = 0, ret, rc; - u32 *info_buf = NULL; - - if (copy_from_user(&info, useraddr, sizeof(info))) - return -EFAULT; - - /* store copy of mask, because we zero struct later on */ - sset_mask = info.sset_mask; - if (!sset_mask) - return 0; - - /* calculate size of return buffer */ - n_bits = hweight64(sset_mask); - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GSSET_INFO; - - info_buf = kcalloc(n_bits, sizeof(u32), GFP_USER); - if (!info_buf) - return -ENOMEM; - - /* - * fill return buffer based on input bitmask and successful - * get_sset_count return - */ - for (i = 0; i < 64; i++) { - if (!(sset_mask & (1ULL << i))) - continue; - - rc = __ethtool_get_sset_count(dev, i); - if (rc >= 0) { - info.sset_mask |= (1ULL << i); - info_buf[idx++] = rc; - } - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) - goto out; - - useraddr += offsetof(struct ethtool_sset_info, data); - if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) - goto out; - - ret = 0; - -out: - kfree(info_buf); - return ret; -} - -static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - u32 cmd, void __user *useraddr) -{ - struct ethtool_rxnfc info; - size_t info_size = sizeof(info); - int rc; - - if (!dev->ethtool_ops->set_rxnfc) - return -EOPNOTSUPP; - - /* struct ethtool_rxnfc was originally defined for - * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data - * members. User-space might still be using that - * definition. */ - if (cmd == ETHTOOL_SRXFH) - info_size = (offsetof(struct ethtool_rxnfc, data) + - sizeof(info.data)); - - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - - rc = dev->ethtool_ops->set_rxnfc(dev, &info); - if (rc) - return rc; - - if (cmd == ETHTOOL_SRXCLSRLINS && - copy_to_user(useraddr, &info, info_size)) - return -EFAULT; - - return 0; -} - -static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - u32 cmd, void __user *useraddr) -{ - struct ethtool_rxnfc info; - size_t info_size = sizeof(info); - const struct ethtool_ops *ops = dev->ethtool_ops; - int ret; - void *rule_buf = NULL; - - if (!ops->get_rxnfc) - return -EOPNOTSUPP; - - /* struct ethtool_rxnfc was originally defined for - * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data - * members. User-space might still be using that - * definition. */ - if (cmd == ETHTOOL_GRXFH) - info_size = (offsetof(struct ethtool_rxnfc, data) + - sizeof(info.data)); - - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - - /* If FLOW_RSS was requested then user-space must be using the - * new definition, as FLOW_RSS is newer. - */ - if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { - info_size = sizeof(info); - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - /* Since malicious users may modify the original data, - * we need to check whether FLOW_RSS is still requested. - */ - if (!(info.flow_type & FLOW_RSS)) - return -EINVAL; - } - - if (info.cmd != cmd) - return -EINVAL; - - if (info.cmd == ETHTOOL_GRXCLSRLALL) { - if (info.rule_cnt > 0) { - if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) - rule_buf = kcalloc(info.rule_cnt, sizeof(u32), - GFP_USER); - if (!rule_buf) - return -ENOMEM; - } - } - - ret = ops->get_rxnfc(dev, &info, rule_buf); - if (ret < 0) - goto err_out; - - ret = -EFAULT; - if (copy_to_user(useraddr, &info, info_size)) - goto err_out; - - if (rule_buf) { - useraddr += offsetof(struct ethtool_rxnfc, rule_locs); - if (copy_to_user(useraddr, rule_buf, - info.rule_cnt * sizeof(u32))) - goto err_out; - } - ret = 0; - -err_out: - kfree(rule_buf); - - return ret; -} - -static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, - struct ethtool_rxnfc *rx_rings, - u32 size) -{ - int i; - - if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) - return -EFAULT; - - /* Validate ring indices */ - for (i = 0; i < size; i++) - if (indir[i] >= rx_rings->data) - return -EINVAL; - - return 0; -} - -u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; - -void netdev_rss_key_fill(void *buffer, size_t len) -{ - BUG_ON(len > sizeof(netdev_rss_key)); - net_get_random_once(netdev_rss_key, sizeof(netdev_rss_key)); - memcpy(buffer, netdev_rss_key, len); -} -EXPORT_SYMBOL(netdev_rss_key_fill); - -static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) -{ - u32 dev_size, current_max = 0; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - while (dev_size--) - current_max = max(current_max, indir[dev_size]); - - *max = current_max; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, - void __user *useraddr) -{ - u32 user_size, dev_size; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - if (copy_from_user(&user_size, - useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(user_size))) - return -EFAULT; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), - &dev_size, sizeof(dev_size))) - return -EFAULT; - - /* If the user buffer size is 0, this is just a query for the - * device table size. Otherwise, if it's smaller than the - * device table size it's an error. - */ - if (user_size < dev_size) - return user_size == 0 ? 0 : -EINVAL; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh_indir, ring_index[0]), - indir, dev_size * sizeof(indir[0]))) - ret = -EFAULT; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_rxnfc rx_rings; - u32 user_size, dev_size, i; - u32 *indir; - const struct ethtool_ops *ops = dev->ethtool_ops; - int ret; - u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - - if (!ops->get_rxfh_indir_size || !ops->set_rxfh || - !ops->get_rxnfc) - return -EOPNOTSUPP; - - dev_size = ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - if (copy_from_user(&user_size, - useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(user_size))) - return -EFAULT; - - if (user_size != 0 && user_size != dev_size) - return -EINVAL; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = ops->get_rxnfc(dev, &rx_rings, NULL); - if (ret) - goto out; - - if (user_size == 0) { - for (i = 0; i < dev_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); - } else { - ret = ethtool_copy_validate_indir(indir, - useraddr + ringidx_offset, - &rx_rings, - dev_size); - if (ret) - goto out; - } - - ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); - if (ret) - goto out; - - /* indicate whether rxfh was set to default */ - if (user_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else - dev->priv_flags |= IFF_RXFH_CONFIGURED; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, - void __user *useraddr) -{ - int ret; - const struct ethtool_ops *ops = dev->ethtool_ops; - u32 user_indir_size, user_key_size; - u32 dev_indir_size = 0, dev_key_size = 0; - struct ethtool_rxfh rxfh; - u32 total_size; - u32 indir_bytes; - u32 *indir = NULL; - u8 dev_hfunc = 0; - u8 *hkey = NULL; - u8 *rss_config; - - if (!ops->get_rxfh) - return -EOPNOTSUPP; - - if (ops->get_rxfh_indir_size) - dev_indir_size = ops->get_rxfh_indir_size(dev); - if (ops->get_rxfh_key_size) - dev_key_size = ops->get_rxfh_key_size(dev); - - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) - return -EFAULT; - user_indir_size = rxfh.indir_size; - user_key_size = rxfh.key_size; - - /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) - return -EINVAL; - /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->get_rxfh_context) - return -EOPNOTSUPP; - - rxfh.indir_size = dev_indir_size; - rxfh.key_size = dev_key_size; - if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) - return -EFAULT; - - if ((user_indir_size && (user_indir_size != dev_indir_size)) || - (user_key_size && (user_key_size != dev_key_size))) - return -EINVAL; - - indir_bytes = user_indir_size * sizeof(indir[0]); - total_size = indir_bytes + user_key_size; - rss_config = kzalloc(total_size, GFP_USER); - if (!rss_config) - return -ENOMEM; - - if (user_indir_size) - indir = (u32 *)rss_config; - - if (user_key_size) - hkey = rss_config + indir_bytes; - - if (rxfh.rss_context) - ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, - &dev_hfunc, - rxfh.rss_context); - else - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); - if (ret) - goto out; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), - &dev_hfunc, sizeof(rxfh.hfunc))) { - ret = -EFAULT; - } else if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh, rss_config[0]), - rss_config, total_size)) { - ret = -EFAULT; - } -out: - kfree(rss_config); - - return ret; -} - -static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, - void __user *useraddr) -{ - int ret; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rxnfc rx_rings; - struct ethtool_rxfh rxfh; - u32 dev_indir_size = 0, dev_key_size = 0, i; - u32 *indir = NULL, indir_bytes = 0; - u8 *hkey = NULL; - u8 *rss_config; - u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); - bool delete = false; - - if (!ops->get_rxnfc || !ops->set_rxfh) - return -EOPNOTSUPP; - - if (ops->get_rxfh_indir_size) - dev_indir_size = ops->get_rxfh_indir_size(dev); - if (ops->get_rxfh_key_size) - dev_key_size = ops->get_rxfh_key_size(dev); - - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) - return -EFAULT; - - /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) - return -EINVAL; - /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->set_rxfh_context) - return -EOPNOTSUPP; - - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key or function. - */ - if ((rxfh.indir_size && - rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && - rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || - (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) - return -EINVAL; - - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(indir[0]); - - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); - if (!rss_config) - return -ENOMEM; - - rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = ops->get_rxnfc(dev, &rx_rings, NULL); - if (ret) - goto out; - - /* rxfh.indir_size == 0 means reset the indir table to default (master - * context) or delete the context (other RSS contexts). - * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. - */ - if (rxfh.indir_size && - rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { - indir = (u32 *)rss_config; - ret = ethtool_copy_validate_indir(indir, - useraddr + rss_cfg_offset, - &rx_rings, - rxfh.indir_size); - if (ret) - goto out; - } else if (rxfh.indir_size == 0) { - if (rxfh.rss_context == 0) { - indir = (u32 *)rss_config; - for (i = 0; i < dev_indir_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); - } else { - delete = true; - } - } - - if (rxfh.key_size) { - hkey = rss_config + indir_bytes; - if (copy_from_user(hkey, - useraddr + rss_cfg_offset + indir_bytes, - rxfh.key_size)) { - ret = -EFAULT; - goto out; - } - } - - if (rxfh.rss_context) - ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, - &rxfh.rss_context, delete); - else - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); - if (ret) - goto out; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), - &rxfh.rss_context, sizeof(rxfh.rss_context))) - ret = -EFAULT; - - if (!rxfh.rss_context) { - /* indicate whether rxfh was set to default */ - if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; - } - -out: - kfree(rss_config); - return ret; -} - -static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_regs regs; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *regbuf; - int reglen, ret; - - if (!ops->get_regs || !ops->get_regs_len) - return -EOPNOTSUPP; - - if (copy_from_user(®s, useraddr, sizeof(regs))) - return -EFAULT; - - reglen = ops->get_regs_len(dev); - if (reglen <= 0) - return reglen; - - if (regs.len > reglen) - regs.len = reglen; - - regbuf = vzalloc(reglen); - if (!regbuf) - return -ENOMEM; - - if (regs.len < reglen) - reglen = regs.len; - - ops->get_regs(dev, ®s, regbuf); - - ret = -EFAULT; - if (copy_to_user(useraddr, ®s, sizeof(regs))) - goto out; - useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, reglen)) - goto out; - ret = 0; - - out: - vfree(regbuf); - return ret; -} - -static int ethtool_reset(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value reset; - int ret; - - if (!dev->ethtool_ops->reset) - return -EOPNOTSUPP; - - if (copy_from_user(&reset, useraddr, sizeof(reset))) - return -EFAULT; - - ret = dev->ethtool_ops->reset(dev, &reset.data); - if (ret) - return ret; - - if (copy_to_user(useraddr, &reset, sizeof(reset))) - return -EFAULT; - return 0; -} - -static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_wolinfo wol; - - if (!dev->ethtool_ops->get_wol) - return -EOPNOTSUPP; - - memset(&wol, 0, sizeof(struct ethtool_wolinfo)); - wol.cmd = ETHTOOL_GWOL; - dev->ethtool_ops->get_wol(dev, &wol); - - if (copy_to_user(useraddr, &wol, sizeof(wol))) - return -EFAULT; - return 0; -} - -static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_wolinfo wol; - int ret; - - if (!dev->ethtool_ops->set_wol) - return -EOPNOTSUPP; - - if (copy_from_user(&wol, useraddr, sizeof(wol))) - return -EFAULT; - - ret = dev->ethtool_ops->set_wol(dev, &wol); - if (ret) - return ret; - - dev->wol_enabled = !!wol.wolopts; - - return 0; -} - -static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_eee edata; - int rc; - - if (!dev->ethtool_ops->get_eee) - return -EOPNOTSUPP; - - memset(&edata, 0, sizeof(struct ethtool_eee)); - edata.cmd = ETHTOOL_GEEE; - rc = dev->ethtool_ops->get_eee(dev, &edata); - - if (rc) - return rc; - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - - return 0; -} - -static int ethtool_set_eee(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_eee edata; - - if (!dev->ethtool_ops->set_eee) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return dev->ethtool_ops->set_eee(dev, &edata); -} - -static int ethtool_nway_reset(struct net_device *dev) -{ - if (!dev->ethtool_ops->nway_reset) - return -EOPNOTSUPP; - - return dev->ethtool_ops->nway_reset(dev); -} - -static int ethtool_get_link(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { .cmd = ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, - int (*getter)(struct net_device *, - struct ethtool_eeprom *, u8 *), - u32 total_len) -{ - struct ethtool_eeprom eeprom; - void __user *userbuf = useraddr + sizeof(eeprom); - u32 bytes_remaining; - u8 *data; - int ret = 0; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > total_len) - return -EINVAL; - - data = kmalloc(PAGE_SIZE, GFP_USER); - if (!data) - return -ENOMEM; - - bytes_remaining = eeprom.len; - while (bytes_remaining > 0) { - eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - - ret = getter(dev, &eeprom, data); - if (ret) - break; - if (copy_to_user(userbuf, data, eeprom.len)) { - ret = -EFAULT; - break; - } - userbuf += eeprom.len; - eeprom.offset += eeprom.len; - bytes_remaining -= eeprom.len; - } - - eeprom.len = userbuf - (useraddr + sizeof(eeprom)); - eeprom.offset -= eeprom.len; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - ret = -EFAULT; - - kfree(data); - return ret; -} - -static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_eeprom || !ops->get_eeprom_len || - !ops->get_eeprom_len(dev)) - return -EOPNOTSUPP; - - return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, - ops->get_eeprom_len(dev)); -} - -static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_eeprom eeprom; - const struct ethtool_ops *ops = dev->ethtool_ops; - void __user *userbuf = useraddr + sizeof(eeprom); - u32 bytes_remaining; - u8 *data; - int ret = 0; - - if (!ops->set_eeprom || !ops->get_eeprom_len || - !ops->get_eeprom_len(dev)) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(PAGE_SIZE, GFP_USER); - if (!data) - return -ENOMEM; - - bytes_remaining = eeprom.len; - while (bytes_remaining > 0) { - eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - - if (copy_from_user(data, userbuf, eeprom.len)) { - ret = -EFAULT; - break; - } - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - break; - userbuf += eeprom.len; - eeprom.offset += eeprom.len; - bytes_remaining -= eeprom.len; - } - - kfree(data); - return ret; -} - -static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; - - if (!dev->ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_coalesce(dev, &coalesce); - - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_coalesce coalesce; - - if (!dev->ethtool_ops->set_coalesce) - return -EOPNOTSUPP; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) - return -EFAULT; - - return dev->ethtool_ops->set_coalesce(dev, &coalesce); -} - -static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM }; - - if (!dev->ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_ringparam(dev, &ringparam); - - if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; - - if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) - return -EFAULT; - - dev->ethtool_ops->get_ringparam(dev, &max); - - /* ensure new ring parameters are within the maximums */ - if (ringparam.rx_pending > max.rx_max_pending || - ringparam.rx_mini_pending > max.rx_mini_max_pending || - ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending || - ringparam.tx_pending > max.tx_max_pending) - return -EINVAL; - - return dev->ethtool_ops->set_ringparam(dev, &ringparam); -} - -static noinline_for_stack int ethtool_get_channels(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - - if (!dev->ethtool_ops->get_channels) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_channels(dev, &channels); - - if (copy_to_user(useraddr, &channels, sizeof(channels))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_set_channels(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; - u16 from_channel, to_channel; - u32 max_rx_in_use = 0; - unsigned int i; - - if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) - return -EOPNOTSUPP; - - if (copy_from_user(&channels, useraddr, sizeof(channels))) - return -EFAULT; - - dev->ethtool_ops->get_channels(dev, &curr); - - /* ensure new counts are within the maximums */ - if (channels.rx_count > curr.max_rx || - channels.tx_count > curr.max_tx || - channels.combined_count > curr.max_combined || - channels.other_count > curr.max_other) - return -EINVAL; - - /* ensure the new Rx count fits within the configured Rx flow - * indirection table settings */ - if (netif_is_rxfh_configured(dev) && - !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && - (channels.combined_count + channels.rx_count) <= max_rx_in_use) - return -EINVAL; - - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); - for (i = from_channel; i < to_channel; i++) - if (xdp_get_umem_from_qid(dev, i)) - return -EINVAL; - - return dev->ethtool_ops->set_channels(dev, &channels); -} - -static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_pauseparam pauseparam = { .cmd = ETHTOOL_GPAUSEPARAM }; - - if (!dev->ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_pauseparam(dev, &pauseparam); - - if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_pauseparam pauseparam; - - if (!dev->ethtool_ops->set_pauseparam) - return -EOPNOTSUPP; - - if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) - return -EFAULT; - - return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); -} - -static int ethtool_self_test(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_test test; - const struct ethtool_ops *ops = dev->ethtool_ops; - u64 *data; - int ret, test_len; - - if (!ops->self_test || !ops->get_sset_count) - return -EOPNOTSUPP; - - test_len = ops->get_sset_count(dev, ETH_SS_TEST); - if (test_len < 0) - return test_len; - WARN_ON(test_len == 0); - - if (copy_from_user(&test, useraddr, sizeof(test))) - return -EFAULT; - - test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->self_test(dev, &test, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &test, sizeof(test))) - goto out; - useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) - goto out; - ret = 0; - - out: - kfree(data); - return ret; -} - -static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - u8 *data; - int ret; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = __ethtool_get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - if (ret > S32_MAX / ETH_GSTRING_LEN) - return -ENOMEM; - WARN_ON_ONCE(!ret); - - gstrings.len = ret; - - if (gstrings.len) { - data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); - if (!data) - return -ENOMEM; - - __ethtool_get_strings(dev, gstrings.string_set, data); - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (gstrings.len && - copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - vfree(data); - return ret; -} - -static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value id; - static bool busy; - const struct ethtool_ops *ops = dev->ethtool_ops; - int rc; - - if (!ops->set_phys_id) - return -EOPNOTSUPP; - - if (busy) - return -EBUSY; - - if (copy_from_user(&id, useraddr, sizeof(id))) - return -EFAULT; - - rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); - if (rc < 0) - return rc; - - /* Drop the RTNL lock while waiting, but prevent reentry or - * removal of the device. - */ - busy = true; - dev_hold(dev); - rtnl_unlock(); - - if (rc == 0) { - /* Driver will handle this itself */ - schedule_timeout_interruptible( - id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT); - } else { - /* Driver expects to be called at twice the frequency in rc */ - int n = rc * 2, i, interval = HZ / n; - - /* Count down seconds */ - do { - /* Count down iterations per second */ - i = n; - do { - rtnl_lock(); - rc = ops->set_phys_id(dev, - (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); - rtnl_unlock(); - if (rc) - break; - schedule_timeout_interruptible(interval); - } while (!signal_pending(current) && --i != 0); - } while (!signal_pending(current) && - (id.data == 0 || --id.data != 0)); - } - - rtnl_lock(); - dev_put(dev); - busy = false; - - (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); - return rc; -} - -static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_stats stats; - const struct ethtool_ops *ops = dev->ethtool_ops; - u64 *data; - int ret, n_stats; - - if (!ops->get_ethtool_stats || !ops->get_sset_count) - return -EOPNOTSUPP; - - n_stats = ops->get_sset_count(dev, ETH_SS_STATS); - if (n_stats < 0) - return n_stats; - if (n_stats > S32_MAX / sizeof(u64)) - return -ENOMEM; - WARN_ON_ONCE(!n_stats); - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = n_stats; - - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) - goto out; - ret = 0; - - out: - vfree(data); - return ret; -} - -static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - struct ethtool_stats stats; - u64 *data; - int ret, n_stats; - - if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) - return -EOPNOTSUPP; - - if (dev->phydev && !ops->get_ethtool_phy_stats) - n_stats = phy_ethtool_get_sset_count(dev->phydev); - else - n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); - if (n_stats < 0) - return n_stats; - if (n_stats > S32_MAX / sizeof(u64)) - return -ENOMEM; - WARN_ON_ONCE(!n_stats); - - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = n_stats; - - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; - - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); - if (ret < 0) - goto out; - } else { - ops->get_ethtool_phy_stats(dev, &stats, data); - } - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) - goto out; - ret = 0; - - out: - vfree(data); - return ret; -} - -static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_perm_addr epaddr; - - if (copy_from_user(&epaddr, useraddr, sizeof(epaddr))) - return -EFAULT; - - if (epaddr.size < dev->addr_len) - return -ETOOSMALL; - epaddr.size = dev->addr_len; - - if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) - return -EFAULT; - useraddr += sizeof(epaddr); - if (copy_to_user(useraddr, dev->perm_addr, epaddr.size)) - return -EFAULT; - return 0; -} - -static int ethtool_get_value(struct net_device *dev, char __user *useraddr, - u32 cmd, u32 (*actor)(struct net_device *)) -{ - struct ethtool_value edata = { .cmd = cmd }; - - if (!actor) - return -EOPNOTSUPP; - - edata.data = actor(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, - void (*actor)(struct net_device *, u32)) -{ - struct ethtool_value edata; - - if (!actor) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - actor(dev, edata.data); - return 0; -} - -static int ethtool_set_value(struct net_device *dev, char __user *useraddr, - int (*actor)(struct net_device *, u32)) -{ - struct ethtool_value edata; - - if (!actor) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return actor(dev, edata.data); -} - -static noinline_for_stack int ethtool_flash_device(struct net_device *dev, - char __user *useraddr) -{ - struct ethtool_flash efl; - - if (copy_from_user(&efl, useraddr, sizeof(efl))) - return -EFAULT; - efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; - - if (!dev->ethtool_ops->flash_device) - return devlink_compat_flash_update(dev, efl.data); - - return dev->ethtool_ops->flash_device(dev, &efl); -} - -static int ethtool_set_dump(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_dump dump; - - if (!dev->ethtool_ops->set_dump) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - return dev->ethtool_ops->set_dump(dev, &dump); -} - -static int ethtool_get_dump_flag(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_dump dump; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_dump_flag) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - ret = ops->get_dump_flag(dev, &dump); - if (ret) - return ret; - - if (copy_to_user(useraddr, &dump, sizeof(dump))) - return -EFAULT; - return 0; -} - -static int ethtool_get_dump_data(struct net_device *dev, - void __user *useraddr) -{ - int ret; - __u32 len; - struct ethtool_dump dump, tmp; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data = NULL; - - if (!ops->get_dump_data || !ops->get_dump_flag) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - memset(&tmp, 0, sizeof(tmp)); - tmp.cmd = ETHTOOL_GET_DUMP_FLAG; - ret = ops->get_dump_flag(dev, &tmp); - if (ret) - return ret; - - len = min(tmp.len, dump.len); - if (!len) - return -EFAULT; - - /* Don't ever let the driver think there's more space available - * than it requested with .get_dump_flag(). - */ - dump.len = len; - - /* Always allocate enough space to hold the whole thing so that the - * driver does not need to check the length and bother with partial - * dumping. - */ - data = vzalloc(tmp.len); - if (!data) - return -ENOMEM; - ret = ops->get_dump_data(dev, &dump, data); - if (ret) - goto out; - - /* There are two sane possibilities: - * 1. The driver's .get_dump_data() does not touch dump.len. - * 2. Or it may set dump.len to how much it really writes, which - * should be tmp.len (or len if it can do a partial dump). - * In any case respond to userspace with the actual length of data - * it's receiving. - */ - WARN_ON(dump.len != len && dump.len != tmp.len); - dump.len = len; - - if (copy_to_user(useraddr, &dump, sizeof(dump))) { - ret = -EFAULT; - goto out; - } - useraddr += offsetof(struct ethtool_dump, data); - if (copy_to_user(useraddr, data, len)) - ret = -EFAULT; -out: - vfree(data); - return ret; -} - -static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) -{ - int err = 0; - struct ethtool_ts_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GET_TS_INFO; - - if (phydev && phydev->drv && phydev->drv->ts_info) { - err = phydev->drv->ts_info(phydev, &info); - } else if (ops->get_ts_info) { - err = ops->get_ts_info(dev, &info); - } else { - info.so_timestamping = - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info.phc_index = -1; - } - - if (err) - return err; - - if (copy_to_user(useraddr, &info, sizeof(info))) - err = -EFAULT; - - return err; -} - -static int __ethtool_get_module_info(struct net_device *dev, - struct ethtool_modinfo *modinfo) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - if (dev->sfp_bus) - return sfp_get_module_info(dev->sfp_bus, modinfo); - - if (phydev && phydev->drv && phydev->drv->module_info) - return phydev->drv->module_info(phydev, modinfo); - - if (ops->get_module_info) - return ops->get_module_info(dev, modinfo); - - return -EOPNOTSUPP; -} - -static int ethtool_get_module_info(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_modinfo modinfo; - - if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) - return -EFAULT; - - ret = __ethtool_get_module_info(dev, &modinfo); - if (ret) - return ret; - - if (copy_to_user(useraddr, &modinfo, sizeof(modinfo))) - return -EFAULT; - - return 0; -} - -static int __ethtool_get_module_eeprom(struct net_device *dev, - struct ethtool_eeprom *ee, u8 *data) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - if (dev->sfp_bus) - return sfp_get_module_eeprom(dev->sfp_bus, ee, data); - - if (phydev && phydev->drv && phydev->drv->module_eeprom) - return phydev->drv->module_eeprom(phydev, ee, data); - - if (ops->get_module_eeprom) - return ops->get_module_eeprom(dev, ee, data); - - return -EOPNOTSUPP; -} - -static int ethtool_get_module_eeprom(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_modinfo modinfo; - - ret = __ethtool_get_module_info(dev, &modinfo); - if (ret) - return ret; - - return ethtool_get_any_eeprom(dev, useraddr, - __ethtool_get_module_eeprom, - modinfo.eeprom_len); -} - -static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) -{ - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - case ETHTOOL_TX_COPYBREAK: - if (tuna->len != sizeof(u32) || - tuna->type_id != ETHTOOL_TUNABLE_U32) - return -EINVAL; - break; - case ETHTOOL_PFC_PREVENTION_TOUT: - if (tuna->len != sizeof(u16) || - tuna->type_id != ETHTOOL_TUNABLE_U16) - return -EINVAL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data; - - if (!ops->get_tunable) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_tunable_valid(&tuna); - if (ret) - return ret; - data = kmalloc(tuna.len, GFP_USER); - if (!data) - return -ENOMEM; - ret = ops->get_tunable(dev, &tuna, data); - if (ret) - goto out; - useraddr += sizeof(tuna); - ret = -EFAULT; - if (copy_to_user(useraddr, data, tuna.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data; - - if (!ops->set_tunable) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_tunable_valid(&tuna); - if (ret) - return ret; - useraddr += sizeof(tuna); - data = memdup_user(useraddr, tuna.len); - if (IS_ERR(data)) - return PTR_ERR(data); - ret = ops->set_tunable(dev, &tuna, data); - - kfree(data); - return ret; -} - -static noinline_for_stack int -ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) -{ - u32 bit; - int ret; - DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); - - if (!dev->ethtool_ops->get_per_queue_coalesce) - return -EOPNOTSUPP; - - useraddr += sizeof(*per_queue_opt); - - bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, - MAX_NUM_QUEUE); - - for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { - struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; - - ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce); - if (ret != 0) - return ret; - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - useraddr += sizeof(coalesce); - } - - return 0; -} - -static noinline_for_stack int -ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) -{ - u32 bit; - int i, ret = 0; - int n_queue; - struct ethtool_coalesce *backup = NULL, *tmp = NULL; - DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); - - if ((!dev->ethtool_ops->set_per_queue_coalesce) || - (!dev->ethtool_ops->get_per_queue_coalesce)) - return -EOPNOTSUPP; - - useraddr += sizeof(*per_queue_opt); - - bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE); - n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); - tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); - if (!backup) - return -ENOMEM; - - for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { - struct ethtool_coalesce coalesce; - - ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp); - if (ret != 0) - goto roll_back; - - tmp++; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) { - ret = -EFAULT; - goto roll_back; - } - - ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); - if (ret != 0) - goto roll_back; - - useraddr += sizeof(coalesce); - } - -roll_back: - if (ret != 0) { - tmp = backup; - for_each_set_bit(i, queue_mask, bit) { - dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp); - tmp++; - } - } - kfree(backup); - - return ret; -} - -static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, - void __user *useraddr, u32 sub_cmd) -{ - struct ethtool_per_queue_op per_queue_opt; - - if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) - return -EFAULT; - - if (per_queue_opt.sub_command != sub_cmd) - return -EINVAL; - - switch (per_queue_opt.sub_command) { - case ETHTOOL_GCOALESCE: - return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); - case ETHTOOL_SCOALESCE: - return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); - default: - return -EOPNOTSUPP; - }; -} - -static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) -{ - switch (tuna->id) { - case ETHTOOL_PHY_DOWNSHIFT: - case ETHTOOL_PHY_FAST_LINK_DOWN: - if (tuna->len != sizeof(u8) || - tuna->type_id != ETHTOOL_TUNABLE_U8) - return -EINVAL; - break; - case ETHTOOL_PHY_EDPD: - if (tuna->len != sizeof(u16) || - tuna->type_id != ETHTOOL_TUNABLE_U16) - return -EINVAL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int get_phy_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - struct phy_device *phydev = dev->phydev; - void *data; - - if (!(phydev && phydev->drv && phydev->drv->get_tunable)) - return -EOPNOTSUPP; - - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_phy_tunable_valid(&tuna); - if (ret) - return ret; - data = kmalloc(tuna.len, GFP_USER); - if (!data) - return -ENOMEM; - mutex_lock(&phydev->lock); - ret = phydev->drv->get_tunable(phydev, &tuna, data); - mutex_unlock(&phydev->lock); - if (ret) - goto out; - useraddr += sizeof(tuna); - ret = -EFAULT; - if (copy_to_user(useraddr, data, tuna.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int set_phy_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - struct phy_device *phydev = dev->phydev; - void *data; - - if (!(phydev && phydev->drv && phydev->drv->set_tunable)) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_phy_tunable_valid(&tuna); - if (ret) - return ret; - useraddr += sizeof(tuna); - data = memdup_user(useraddr, tuna.len); - if (IS_ERR(data)) - return PTR_ERR(data); - mutex_lock(&phydev->lock); - ret = phydev->drv->set_tunable(phydev, &tuna, data); - mutex_unlock(&phydev->lock); - - kfree(data); - return ret; -} - -static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_fecparam fecparam = { .cmd = ETHTOOL_GFECPARAM }; - int rc; - - if (!dev->ethtool_ops->get_fecparam) - return -EOPNOTSUPP; - - rc = dev->ethtool_ops->get_fecparam(dev, &fecparam); - if (rc) - return rc; - - if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_fecparam fecparam; - - if (!dev->ethtool_ops->set_fecparam) - return -EOPNOTSUPP; - - if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) - return -EFAULT; - - return dev->ethtool_ops->set_fecparam(dev, &fecparam); -} - -/* The main entry point in this file. Called from net/core/dev_ioctl.c */ - -int dev_ethtool(struct net *net, struct ifreq *ifr) -{ - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - void __user *useraddr = ifr->ifr_data; - u32 ethcmd, sub_cmd; - int rc; - netdev_features_t old_features; - - if (!dev || !netif_device_present(dev)) - return -ENODEV; - - if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) - return -EFAULT; - - if (ethcmd == ETHTOOL_PERQUEUE) { - if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) - return -EFAULT; - } else { - sub_cmd = ethcmd; - } - /* Allow some commands to be done by anyone */ - switch (sub_cmd) { - case ETHTOOL_GSET: - case ETHTOOL_GDRVINFO: - case ETHTOOL_GMSGLVL: - case ETHTOOL_GLINK: - case ETHTOOL_GCOALESCE: - case ETHTOOL_GRINGPARAM: - case ETHTOOL_GPAUSEPARAM: - case ETHTOOL_GRXCSUM: - case ETHTOOL_GTXCSUM: - case ETHTOOL_GSG: - case ETHTOOL_GSSET_INFO: - case ETHTOOL_GSTRINGS: - case ETHTOOL_GSTATS: - case ETHTOOL_GPHYSTATS: - case ETHTOOL_GTSO: - case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: - case ETHTOOL_GGSO: - case ETHTOOL_GGRO: - case ETHTOOL_GFLAGS: - case ETHTOOL_GPFLAGS: - case ETHTOOL_GRXFH: - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_GRXCLSRLALL: - case ETHTOOL_GRXFHINDIR: - case ETHTOOL_GRSSH: - case ETHTOOL_GFEATURES: - case ETHTOOL_GCHANNELS: - case ETHTOOL_GET_TS_INFO: - case ETHTOOL_GEEE: - case ETHTOOL_GTUNABLE: - case ETHTOOL_PHY_GTUNABLE: - case ETHTOOL_GLINKSETTINGS: - case ETHTOOL_GFECPARAM: - break; - default: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - } - - if (dev->ethtool_ops->begin) { - rc = dev->ethtool_ops->begin(dev); - if (rc < 0) - return rc; - } - old_features = dev->features; - - switch (ethcmd) { - case ETHTOOL_GSET: - rc = ethtool_get_settings(dev, useraddr); - break; - case ETHTOOL_SSET: - rc = ethtool_set_settings(dev, useraddr); - break; - case ETHTOOL_GDRVINFO: - rc = ethtool_get_drvinfo(dev, useraddr); - break; - case ETHTOOL_GREGS: - rc = ethtool_get_regs(dev, useraddr); - break; - case ETHTOOL_GWOL: - rc = ethtool_get_wol(dev, useraddr); - break; - case ETHTOOL_SWOL: - rc = ethtool_set_wol(dev, useraddr); - break; - case ETHTOOL_GMSGLVL: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_msglevel); - break; - case ETHTOOL_SMSGLVL: - rc = ethtool_set_value_void(dev, useraddr, - dev->ethtool_ops->set_msglevel); - break; - case ETHTOOL_GEEE: - rc = ethtool_get_eee(dev, useraddr); - break; - case ETHTOOL_SEEE: - rc = ethtool_set_eee(dev, useraddr); - break; - case ETHTOOL_NWAY_RST: - rc = ethtool_nway_reset(dev); - break; - case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); - break; - case ETHTOOL_GEEPROM: - rc = ethtool_get_eeprom(dev, useraddr); - break; - case ETHTOOL_SEEPROM: - rc = ethtool_set_eeprom(dev, useraddr); - break; - case ETHTOOL_GCOALESCE: - rc = ethtool_get_coalesce(dev, useraddr); - break; - case ETHTOOL_SCOALESCE: - rc = ethtool_set_coalesce(dev, useraddr); - break; - case ETHTOOL_GRINGPARAM: - rc = ethtool_get_ringparam(dev, useraddr); - break; - case ETHTOOL_SRINGPARAM: - rc = ethtool_set_ringparam(dev, useraddr); - break; - case ETHTOOL_GPAUSEPARAM: - rc = ethtool_get_pauseparam(dev, useraddr); - break; - case ETHTOOL_SPAUSEPARAM: - rc = ethtool_set_pauseparam(dev, useraddr); - break; - case ETHTOOL_TEST: - rc = ethtool_self_test(dev, useraddr); - break; - case ETHTOOL_GSTRINGS: - rc = ethtool_get_strings(dev, useraddr); - break; - case ETHTOOL_PHYS_ID: - rc = ethtool_phys_id(dev, useraddr); - break; - case ETHTOOL_GSTATS: - rc = ethtool_get_stats(dev, useraddr); - break; - case ETHTOOL_GPERMADDR: - rc = ethtool_get_perm_addr(dev, useraddr); - break; - case ETHTOOL_GFLAGS: - rc = ethtool_get_value(dev, useraddr, ethcmd, - __ethtool_get_flags); - break; - case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); - break; - case ETHTOOL_GPFLAGS: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_priv_flags); - break; - case ETHTOOL_SPFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_priv_flags); - break; - case ETHTOOL_GRXFH: - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); - break; - case ETHTOOL_SRXFH: - case ETHTOOL_SRXCLSRLDEL: - case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); - break; - case ETHTOOL_FLASHDEV: - rc = ethtool_flash_device(dev, useraddr); - break; - case ETHTOOL_RESET: - rc = ethtool_reset(dev, useraddr); - break; - case ETHTOOL_GSSET_INFO: - rc = ethtool_get_sset_info(dev, useraddr); - break; - case ETHTOOL_GRXFHINDIR: - rc = ethtool_get_rxfh_indir(dev, useraddr); - break; - case ETHTOOL_SRXFHINDIR: - rc = ethtool_set_rxfh_indir(dev, useraddr); - break; - case ETHTOOL_GRSSH: - rc = ethtool_get_rxfh(dev, useraddr); - break; - case ETHTOOL_SRSSH: - rc = ethtool_set_rxfh(dev, useraddr); - break; - case ETHTOOL_GFEATURES: - rc = ethtool_get_features(dev, useraddr); - break; - case ETHTOOL_SFEATURES: - rc = ethtool_set_features(dev, useraddr); - break; - case ETHTOOL_GTXCSUM: - case ETHTOOL_GRXCSUM: - case ETHTOOL_GSG: - case ETHTOOL_GTSO: - case ETHTOOL_GGSO: - case ETHTOOL_GGRO: - rc = ethtool_get_one_feature(dev, useraddr, ethcmd); - break; - case ETHTOOL_STXCSUM: - case ETHTOOL_SRXCSUM: - case ETHTOOL_SSG: - case ETHTOOL_STSO: - case ETHTOOL_SGSO: - case ETHTOOL_SGRO: - rc = ethtool_set_one_feature(dev, useraddr, ethcmd); - break; - case ETHTOOL_GCHANNELS: - rc = ethtool_get_channels(dev, useraddr); - break; - case ETHTOOL_SCHANNELS: - rc = ethtool_set_channels(dev, useraddr); - break; - case ETHTOOL_SET_DUMP: - rc = ethtool_set_dump(dev, useraddr); - break; - case ETHTOOL_GET_DUMP_FLAG: - rc = ethtool_get_dump_flag(dev, useraddr); - break; - case ETHTOOL_GET_DUMP_DATA: - rc = ethtool_get_dump_data(dev, useraddr); - break; - case ETHTOOL_GET_TS_INFO: - rc = ethtool_get_ts_info(dev, useraddr); - break; - case ETHTOOL_GMODULEINFO: - rc = ethtool_get_module_info(dev, useraddr); - break; - case ETHTOOL_GMODULEEEPROM: - rc = ethtool_get_module_eeprom(dev, useraddr); - break; - case ETHTOOL_GTUNABLE: - rc = ethtool_get_tunable(dev, useraddr); - break; - case ETHTOOL_STUNABLE: - rc = ethtool_set_tunable(dev, useraddr); - break; - case ETHTOOL_GPHYSTATS: - rc = ethtool_get_phy_stats(dev, useraddr); - break; - case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); - break; - case ETHTOOL_GLINKSETTINGS: - rc = ethtool_get_link_ksettings(dev, useraddr); - break; - case ETHTOOL_SLINKSETTINGS: - rc = ethtool_set_link_ksettings(dev, useraddr); - break; - case ETHTOOL_PHY_GTUNABLE: - rc = get_phy_tunable(dev, useraddr); - break; - case ETHTOOL_PHY_STUNABLE: - rc = set_phy_tunable(dev, useraddr); - break; - case ETHTOOL_GFECPARAM: - rc = ethtool_get_fecparam(dev, useraddr); - break; - case ETHTOOL_SFECPARAM: - rc = ethtool_set_fecparam(dev, useraddr); - break; - default: - rc = -EOPNOTSUPP; - } - - if (dev->ethtool_ops->complete) - dev->ethtool_ops->complete(dev); - - if (old_features != dev->features) - netdev_features_change(dev); - - return rc; -} - -struct ethtool_rx_flow_key { - struct flow_dissector_key_basic basic; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; - struct flow_dissector_key_ports tp; - struct flow_dissector_key_ip ip; - struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_eth_addrs eth_addrs; -} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ - -struct ethtool_rx_flow_match { - struct flow_dissector dissector; - struct ethtool_rx_flow_key key; - struct ethtool_rx_flow_key mask; -}; - -struct ethtool_rx_flow_rule * -ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) -{ - const struct ethtool_rx_flow_spec *fs = input->fs; - static struct in6_addr zero_addr = {}; - struct ethtool_rx_flow_match *match; - struct ethtool_rx_flow_rule *flow; - struct flow_action_entry *act; - - flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + - sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - /* ethtool_rx supports only one single action per rule. */ - flow->rule = flow_rule_alloc(1); - if (!flow->rule) { - kfree(flow); - return ERR_PTR(-ENOMEM); - } - - match = (struct ethtool_rx_flow_match *)flow->priv; - flow->rule->match.dissector = &match->dissector; - flow->rule->match.mask = &match->mask; - flow->rule->match.key = &match->key; - - match->mask.basic.n_proto = htons(0xffff); - - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { - case ETHER_FLOW: { - const struct ethhdr *ether_spec, *ether_m_spec; - - ether_spec = &fs->h_u.ether_spec; - ether_m_spec = &fs->m_u.ether_spec; - - if (!is_zero_ether_addr(ether_m_spec->h_source)) { - ether_addr_copy(match->key.eth_addrs.src, - ether_spec->h_source); - ether_addr_copy(match->mask.eth_addrs.src, - ether_m_spec->h_source); - } - if (!is_zero_ether_addr(ether_m_spec->h_dest)) { - ether_addr_copy(match->key.eth_addrs.dst, - ether_spec->h_dest); - ether_addr_copy(match->mask.eth_addrs.dst, - ether_m_spec->h_dest); - } - if (ether_m_spec->h_proto) { - match->key.basic.n_proto = ether_spec->h_proto; - match->mask.basic.n_proto = ether_m_spec->h_proto; - } - } - break; - case TCP_V4_FLOW: - case UDP_V4_FLOW: { - const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; - - match->key.basic.n_proto = htons(ETH_P_IP); - - v4_spec = &fs->h_u.tcp_ip4_spec; - v4_m_spec = &fs->m_u.tcp_ip4_spec; - - if (v4_m_spec->ip4src) { - match->key.ipv4.src = v4_spec->ip4src; - match->mask.ipv4.src = v4_m_spec->ip4src; - } - if (v4_m_spec->ip4dst) { - match->key.ipv4.dst = v4_spec->ip4dst; - match->mask.ipv4.dst = v4_m_spec->ip4dst; - } - if (v4_m_spec->ip4src || - v4_m_spec->ip4dst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = - offsetof(struct ethtool_rx_flow_key, ipv4); - } - if (v4_m_spec->psrc) { - match->key.tp.src = v4_spec->psrc; - match->mask.tp.src = v4_m_spec->psrc; - } - if (v4_m_spec->pdst) { - match->key.tp.dst = v4_spec->pdst; - match->mask.tp.dst = v4_m_spec->pdst; - } - if (v4_m_spec->psrc || - v4_m_spec->pdst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_PORTS); - match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = - offsetof(struct ethtool_rx_flow_key, tp); - } - if (v4_m_spec->tos) { - match->key.ip.tos = v4_spec->tos; - match->mask.ip.tos = v4_m_spec->tos; - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IP); - match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = - offsetof(struct ethtool_rx_flow_key, ip); - } - } - break; - case TCP_V6_FLOW: - case UDP_V6_FLOW: { - const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; - - match->key.basic.n_proto = htons(ETH_P_IPV6); - - v6_spec = &fs->h_u.tcp_ip6_spec; - v6_m_spec = &fs->m_u.tcp_ip6_spec; - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { - memcpy(&match->key.ipv6.src, v6_spec->ip6src, - sizeof(match->key.ipv6.src)); - memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, - sizeof(match->mask.ipv6.src)); - } - if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { - memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, - sizeof(match->key.ipv6.dst)); - memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, - sizeof(match->mask.ipv6.dst)); - } - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = - offsetof(struct ethtool_rx_flow_key, ipv6); - } - if (v6_m_spec->psrc) { - match->key.tp.src = v6_spec->psrc; - match->mask.tp.src = v6_m_spec->psrc; - } - if (v6_m_spec->pdst) { - match->key.tp.dst = v6_spec->pdst; - match->mask.tp.dst = v6_m_spec->pdst; - } - if (v6_m_spec->psrc || - v6_m_spec->pdst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_PORTS); - match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = - offsetof(struct ethtool_rx_flow_key, tp); - } - if (v6_m_spec->tclass) { - match->key.ip.tos = v6_spec->tclass; - match->mask.ip.tos = v6_m_spec->tclass; - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IP); - match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = - offsetof(struct ethtool_rx_flow_key, ip); - } - } - break; - default: - ethtool_rx_flow_rule_destroy(flow); - return ERR_PTR(-EINVAL); - } - - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { - case TCP_V4_FLOW: - case TCP_V6_FLOW: - match->key.basic.ip_proto = IPPROTO_TCP; - break; - case UDP_V4_FLOW: - case UDP_V6_FLOW: - match->key.basic.ip_proto = IPPROTO_UDP; - break; - } - match->mask.basic.ip_proto = 0xff; - - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); - match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = - offsetof(struct ethtool_rx_flow_key, basic); - - if (fs->flow_type & FLOW_EXT) { - const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; - const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - - if (ext_m_spec->vlan_etype) { - match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; - match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; - } - - if (ext_m_spec->vlan_tci) { - match->key.vlan.vlan_id = - ntohs(ext_h_spec->vlan_tci) & 0x0fff; - match->mask.vlan.vlan_id = - ntohs(ext_m_spec->vlan_tci) & 0x0fff; - - match->key.vlan.vlan_dei = - !!(ext_h_spec->vlan_tci & htons(0x1000)); - match->mask.vlan.vlan_dei = - !!(ext_m_spec->vlan_tci & htons(0x1000)); - - match->key.vlan.vlan_priority = - (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; - match->mask.vlan.vlan_priority = - (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; - } - - if (ext_m_spec->vlan_etype || - ext_m_spec->vlan_tci) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_VLAN); - match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = - offsetof(struct ethtool_rx_flow_key, vlan); - } - } - if (fs->flow_type & FLOW_MAC_EXT) { - const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; - const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - - memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, - ETH_ALEN); - memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, - ETH_ALEN); - - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = - offsetof(struct ethtool_rx_flow_key, eth_addrs); - } - - act = &flow->rule->action.entries[0]; - switch (fs->ring_cookie) { - case RX_CLS_FLOW_DISC: - act->id = FLOW_ACTION_DROP; - break; - case RX_CLS_FLOW_WAKE: - act->id = FLOW_ACTION_WAKE; - break; - default: - act->id = FLOW_ACTION_QUEUE; - if (fs->flow_type & FLOW_RSS) - act->queue.ctx = input->rss_ctx; - - act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); - break; - } - - return flow; -} -EXPORT_SYMBOL(ethtool_rx_flow_rule_create); - -void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) -{ - kfree(flow->rule); - kfree(flow); -} -EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); diff --git a/net/core/filter.c b/net/core/filter.c index 538f6a735a19..792e3744b915 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3459,119 +3459,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { .arg2_type = ARG_ANYTHING, }; -static int __bpf_tx_xdp(struct net_device *dev, - struct bpf_map *map, - struct xdp_buff *xdp, - u32 index) -{ - struct xdp_frame *xdpf; - int err, sent; - - if (!dev->netdev_ops->ndo_xdp_xmit) { - return -EOPNOTSUPP; - } - - err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); - if (unlikely(err)) - return err; - - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - - sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH); - if (sent <= 0) - return sent; - return 0; -} - -static noinline int -xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri) -{ - struct net_device *fwd; - u32 index = ri->tgt_index; - int err; - - fwd = dev_get_by_index_rcu(dev_net(dev), index); - ri->tgt_index = 0; - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } - - err = __bpf_tx_xdp(fwd, NULL, xdp, 0); - if (unlikely(err)) - goto err; - - _trace_xdp_redirect(dev, xdp_prog, index); - return 0; -err: - _trace_xdp_redirect_err(dev, xdp_prog, index, err); - return err; -} - static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, - struct bpf_map *map, - struct xdp_buff *xdp, - u32 index) + struct bpf_map *map, struct xdp_buff *xdp) { - int err; - switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: { - struct bpf_dtab_netdev *dst = fwd; - - err = dev_map_enqueue(dst, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_CPUMAP: { - struct bpf_cpu_map_entry *rcpu = fwd; - - err = cpu_map_enqueue(rcpu, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_XSKMAP: { - struct xdp_sock *xs = fwd; - - err = __xsk_map_redirect(map, xdp, xs); - return err; - } + case BPF_MAP_TYPE_DEVMAP_HASH: + return dev_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_CPUMAP: + return cpu_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_XSKMAP: + return __xsk_map_redirect(fwd, xdp); default: - break; + return -EBADRQC; } return 0; } -void xdp_do_flush_map(void) +void xdp_do_flush(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = ri->map_to_flush; - - ri->map_to_flush = NULL; - if (map) { - switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - __dev_map_flush(map); - break; - case BPF_MAP_TYPE_CPUMAP: - __cpu_map_flush(map); - break; - case BPF_MAP_TYPE_XSKMAP: - __xsk_map_flush(map); - break; - default: - break; - } - } + __dev_flush(); + __cpu_map_flush(); + __xsk_map_flush(); } -EXPORT_SYMBOL_GPL(xdp_do_flush_map); +EXPORT_SYMBOL_GPL(xdp_do_flush); static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) { @@ -3606,10 +3517,11 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } -static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct bpf_map *map, - struct bpf_redirect_info *ri) +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); u32 index = ri->tgt_index; void *fwd = ri->tgt_value; int err; @@ -3618,32 +3530,27 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) - xdp_do_flush_map(); + if (unlikely(!map)) { + fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + err = dev_xdp_enqueue(fwd, xdp, dev); + } else { + err = __bpf_tx_xdp_map(dev, fwd, map, xdp); + } - err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); if (unlikely(err)) goto err; - ri->map_to_flush = map; _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); return 0; err: _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); return err; } - -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) -{ - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = READ_ONCE(ri->map); - - if (likely(map)) - return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri); - - return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri); -} EXPORT_SYMBOL_GPL(xdp_do_redirect); static int xdp_do_generic_redirect_map(struct net_device *dev, @@ -5976,7 +5883,7 @@ bool bpf_helper_changes_pkt_data(void *func) return false; } -static const struct bpf_func_proto * +const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -6016,6 +5923,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_jiffies64: + return &bpf_jiffies64_proto; default: return NULL; } @@ -7648,21 +7557,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock, type): - BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); - *target_size = 2; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_type), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_type, + sizeof_field(struct sock, sk_type), + target_size)); break; case offsetof(struct bpf_sock, protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); - *target_size = 1; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_protocol), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_protocol, + sizeof_field(struct sock, sk_protocol), + target_size)); break; case offsetof(struct bpf_sock, src_ip4): @@ -7944,20 +7853,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock_addr, type): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_type); break; case offsetof(struct bpf_sock_addr, protocol): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_protocol); break; case offsetof(struct bpf_sock_addr, msg_src_ip4): @@ -8876,11 +8778,11 @@ sk_reuseport_is_valid_access(int off, int size, skb, \ SKB_FIELD) -#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ - struct sock, \ - sk, \ - SK_FIELD, BPF_SIZE, EXTRA_OFF) +#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \ + SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ + struct sock, \ + sk, \ + SK_FIELD) static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, @@ -8904,16 +8806,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct sk_reuseport_md, ip_protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, - BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); - /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian - * aware. No further narrowing or masking is needed. - */ - *target_size = 1; + SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol); break; case offsetof(struct sk_reuseport_md, data_end): @@ -8941,3 +8834,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; #endif /* CONFIG_INET */ + +DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) + +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) +{ + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), + prev_prog, prog); +} diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2dbbb030fbed..a1670dff0629 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -233,7 +233,7 @@ static bool icmp_has_id(u8 type) * @skb: sk_buff to extract from * @key_icmp: struct flow_dissector_key_icmp to fill * @data: raw buffer pointer to the packet - * @toff: offset to extract at + * @thoff: offset to extract at * @hlen: packet header length */ void skb_flow_get_icmp_tci(const struct sk_buff *skb, @@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, @@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 39402840025e..757cc1d084e7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -211,16 +211,10 @@ static int net_eq_idr(int id, void *net, void *peer) return 0; } -/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc - * is set to true, thus the caller knows that the new id must be notified via - * rtnl. - */ -static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) +/* Must be called from RCU-critical section or with nsid_lock held */ +static int __peernet2id(const struct net *net, struct net *peer) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); - bool alloc_it = *alloc; - - *alloc = false; /* Magic value for id 0. */ if (id == NET_ID_ZERO) @@ -228,23 +222,9 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) if (id > 0) return id; - if (alloc_it) { - id = alloc_netid(net, peer, -1); - *alloc = true; - return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; - } - return NETNSA_NSID_NOT_ASSIGNED; } -/* should be called with nsid_lock held */ -static int __peernet2id(struct net *net, struct net *peer) -{ - bool no = false; - - return __peernet2id_alloc(net, peer, &no); -} - static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will @@ -252,38 +232,50 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, */ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { - bool alloc = false, alive = false; int id; if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock_bh(&net->nsid_lock); - /* - * When peer is obtained from RCU lists, we may race with + + spin_lock(&net->nsid_lock); + id = __peernet2id(net, peer); + if (id >= 0) { + spin_unlock(&net->nsid_lock); + return id; + } + + /* When peer is obtained from RCU lists, we may race with * its cleanup. Check whether it's alive, and this guarantees * we never hash a peer back to net->netns_ids, after it has * just been idr_remove()'d from there in cleanup_net(). */ - if (maybe_get_net(peer)) - alive = alloc = true; - id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_bh(&net->nsid_lock); - if (alloc && id >= 0) - rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); - if (alive) - put_net(peer); + if (!maybe_get_net(peer)) { + spin_unlock(&net->nsid_lock); + return NETNSA_NSID_NOT_ASSIGNED; + } + + id = alloc_netid(net, peer, -1); + spin_unlock(&net->nsid_lock); + + put_net(peer); + if (id < 0) + return NETNSA_NSID_NOT_ASSIGNED; + + rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); + return id; } EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ -int peernet2id(struct net *net, struct net *peer) +int peernet2id(const struct net *net, struct net *peer) { int id; - spin_lock_bh(&net->nsid_lock); + rcu_read_lock(); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + rcu_read_unlock(); + return id; } EXPORT_SYMBOL(peernet2id); @@ -291,12 +283,12 @@ EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. */ -bool peernet_has_id(struct net *net, struct net *peer) +bool peernet_has_id(const struct net *net, struct net *peer) { return peernet2id(net, peer) >= 0; } -struct net *get_net_ns_by_id(struct net *net, int id) +struct net *get_net_ns_by_id(const struct net *net, int id) { struct net *peer; @@ -528,20 +520,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -754,9 +746,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -765,7 +757,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); @@ -950,6 +942,7 @@ struct rtnl_net_dump_cb { int s_idx; }; +/* Runs in RCU-critical section. */ static int rtnl_net_dumpid_one(int id, void *peer, void *data) { struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; @@ -1034,19 +1027,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) goto end; } - spin_lock_bh(&net_cb.tgt_net->nsid_lock); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net) && - !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) { - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); - err = -EAGAIN; - goto end; - } + rcu_read_lock(); idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net)) - spin_unlock_bh(&net_cb.ref_net->nsid_lock); - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); + rcu_read_unlock(); cb->args[0] = net_cb.idx; end: diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a6aefe989043..9b7cbe35df37 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -96,10 +96,65 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); +static void __page_pool_return_page(struct page_pool *pool, struct page *page); + +noinline +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, + bool refill) +{ + struct ptr_ring *r = &pool->ring; + struct page *page; + int pref_nid; /* preferred NUMA node */ + + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Softirq guarantee CPU and thus NUMA node is stable. This, + * assumes CPU refilling driver RX-ring will also run RX-NAPI. + */ +#ifdef CONFIG_NUMA + pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; +#else + /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ + pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ +#endif + + /* Slower-path: Get pages from locked ring queue */ + spin_lock(&r->consumer_lock); + + /* Refill alloc array, but only if NUMA match */ + do { + page = __ptr_ring_consume(r); + if (unlikely(!page)) + break; + + if (likely(page_to_nid(page) == pref_nid)) { + pool->alloc.cache[pool->alloc.count++] = page; + } else { + /* NUMA mismatch; + * (1) release 1 page to page-allocator and + * (2) break out to fallthrough to alloc_pages_node. + * This limit stress on page buddy alloactor. + */ + __page_pool_return_page(pool, page); + page = NULL; + break; + } + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL && + refill); + + /* Return last page */ + if (likely(pool->alloc.count > 0)) + page = pool->alloc.cache[--pool->alloc.count]; + + spin_unlock(&r->consumer_lock); + return page; +} + /* fast path */ static struct page *__page_pool_get_cached(struct page_pool *pool) { - struct ptr_ring *r = &pool->ring; bool refill = false; struct page *page; @@ -113,20 +168,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) refill = true; } - /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) - return NULL; - - /* Slow-path: Get page from locked ring queue, - * refill alloc array if requested. - */ - spin_lock(&r->consumer_lock); - page = __ptr_ring_consume(r); - if (refill) - pool->alloc.count = __ptr_ring_consume_batched(r, - pool->alloc.cache, - PP_ALLOC_CACHE_REFILL); - spin_unlock(&r->consumer_lock); + page = page_pool_refill_alloc_cache(pool, refill); return page; } @@ -163,7 +205,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ /* Cache was empty, do real allocation */ +#ifdef CONFIG_NUMA page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); +#else + page = alloc_pages(gfp, pool->p.order); +#endif if (!page) return NULL; @@ -311,13 +357,10 @@ static bool __page_pool_recycle_direct(struct page *page, /* page is NOT reusable when: * 1) allocated when system is under some pressure. (page_is_pfmemalloc) - * 2) belongs to a different NUMA node than pool->p.nid. - * - * To update pool->p.nid users must call page_pool_update_nid. */ static bool pool_page_reusable(struct page_pool *pool, struct page *page) { - return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid; + return !page_is_pfmemalloc(page); } void __page_pool_put_page(struct page_pool *pool, struct page *page, @@ -484,7 +527,15 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { + struct page *page; + trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; + + /* Flush pool alloc cache, as refill will check NUMA node */ + while (pool->alloc.count) { + page = pool->alloc.cache[--pool->alloc.count]; + __page_pool_return_page(pool, page); + } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d9001b5c48eb..cdad6ed532c4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + rtnl_prop_list_size(dev) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + 0; } @@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) goto nla_put_failure; + if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) && + nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr)) + goto nla_put_failure; rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) @@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROP_LIST] = { .type = NLA_NESTED }, [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, + [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 973a71f4bc89..864cb9e9622f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,6 +68,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> +#include <net/mptcp.h> #include <linux/uaccess.h> #include <trace/events/skb.h> @@ -3638,6 +3639,97 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) return head_frag; } +struct sk_buff *skb_segment_list(struct sk_buff *skb, + netdev_features_t features, + unsigned int offset) +{ + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; + unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int delta_truesize = 0; + unsigned int delta_len = 0; + struct sk_buff *tail = NULL; + struct sk_buff *nskb; + + skb_push(skb, -skb_network_offset(skb) + offset); + + skb_shinfo(skb)->frag_list = NULL; + + do { + nskb = list_skb; + list_skb = list_skb->next; + + if (!tail) + skb->next = nskb; + else + tail->next = nskb; + + tail = nskb; + + delta_len += nskb->len; + delta_truesize += nskb->truesize; + + skb_push(nskb, -skb_network_offset(nskb) + offset); + + __copy_skb_header(nskb, skb); + + skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + offset + tnl_hlen); + + if (skb_needs_linearize(nskb, features) && + __skb_linearize(nskb)) + goto err_linearize; + + } while (list_skb); + + skb->truesize = skb->truesize - delta_truesize; + skb->data_len = skb->data_len - delta_len; + skb->len = skb->len - delta_len; + + skb_gso_reset(skb); + + skb->prev = tail; + + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) + goto err_linearize; + + skb_get(skb); + + return skb; + +err_linearize: + kfree_skb_list(skb->next); + skb->next = NULL; + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(skb_segment_list); + +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(skb_gro_receive_list); + /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment @@ -4109,6 +4201,9 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif +#if IS_ENABLED(CONFIG_MPTCP) + [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4123,6 +4218,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) skb_ext_type_len[TC_SKB_EXT] + #endif +#if IS_ENABLED(CONFIG_MPTCP) + skb_ext_type_len[SKB_EXT_MPTCP] + +#endif 0; } @@ -5472,12 +5570,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, } /** - * skb_mpls_push() - push a new MPLS header after the mac header + * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of + * the packet * * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) * @mac_len: length of the MAC header + * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is + * ethernet * * Expects skb->data at mac header. * @@ -5501,7 +5602,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, return err; if (!skb->inner_protocol) { - skb_set_inner_network_header(skb, mac_len); + skb_set_inner_network_header(skb, skb_network_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); } @@ -5510,6 +5611,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, mac_len); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); + skb_reset_mac_len(skb); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; @@ -5529,7 +5631,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push); * @skb: buffer * @next_proto: ethertype of header after popped MPLS header * @mac_len: length of the MAC header - * @ethernet: flag to indicate if ethernet header is present in packet + * @ethernet: flag to indicate if the packet is ethernet * * Expects skb->data at mac header. * @@ -5976,7 +6078,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } -static struct skb_ext *skb_ext_alloc(void) +/** + * __skb_ext_alloc - allocate a new skb extensions storage + * + * Returns the newly allocated pointer. The pointer can later attached to a + * skb via __skb_ext_set(). + * Note: caller must handle the skb_ext as an opaque data. + */ +struct skb_ext *__skb_ext_alloc(void) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); @@ -6017,6 +6126,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, } /** + * __skb_ext_set - attach the specified extension storage to this skb + * @skb: buffer + * @id: extension id + * @ext: extension storage previously allocated via __skb_ext_alloc() + * + * Existing extensions, if any, are cleared. + * + * Returns the pointer to the extension. + */ +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext) +{ + unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); + + skb_ext_put(skb); + newlen = newoff + skb_ext_type_len[id]; + ext->chunks = newlen; + ext->offset[id] = newoff; + skb->extensions = ext; + skb->active_extensions = 1 << id; + return skb_ext_get_ptr(ext, id); +} + +/** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for @@ -6049,7 +6182,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); - new = skb_ext_alloc(); + new = __skb_ext_alloc(); if (!new) return NULL; } diff --git a/net/core/sock.c b/net/core/sock.c index 8459ad579f73..a4c8fac781ff 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2786,7 +2786,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_unlock(); } -static void sock_def_readable(struct sock *sk) +void sock_def_readable(struct sock *sk) { struct socket_wq *wq; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index f19f179538b9..91e9f2223c39 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -107,7 +107,6 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) if (!more_reuse) return NULL; - more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7911235706a9..04840697fe79 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -13,7 +13,7 @@ static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && - skb->dev->phydev->drv)) + skb->dev->phydev->mii_ts)) return ptp_classify_raw(skb); else return PTP_CLASS_NONE; @@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { - struct phy_device *phydev; + struct mii_timestamper *mii_ts; struct sk_buff *clone; unsigned int type; @@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return; - phydev = skb->dev->phydev; - if (likely(phydev->drv->txtstamp)) { + mii_ts = skb->dev->phydev->mii_ts; + if (likely(mii_ts->txtstamp)) { clone = skb_clone_sk(skb); if (!clone) return; - phydev->drv->txtstamp(phydev, clone, type); + mii_ts->txtstamp(mii_ts, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { - struct phy_device *phydev; + struct mii_timestamper *mii_ts; unsigned int type; - if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv) + if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) return false; if (skb_headroom(skb) < ETH_HLEN) @@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return false; - phydev = skb->dev->phydev; - if (likely(phydev->drv->rxtstamp)) - return phydev->drv->rxtstamp(phydev, skb, type); + mii_ts = skb->dev->phydev->mii_ts; + if (likely(mii_ts->rxtstamp)) + return mii_ts->rxtstamp(mii_ts, skb, type); return false; } |