diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 258 |
1 files changed, 165 insertions, 93 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 0ad39c87b7fd..a69e8bd7ed74 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -928,7 +928,7 @@ EXPORT_SYMBOL(dev_get_by_napi_id); * * The use of raw_seqcount_begin() and cond_resched() before * retrying is required as we want to give the writers a chance - * to complete when CONFIG_PREEMPT is not set. + * to complete when CONFIG_PREEMPTION is not set. */ int netdev_get_name(struct net *net, char *name, int ifindex) { @@ -1764,7 +1764,6 @@ EXPORT_SYMBOL(register_netdevice_notifier); int unregister_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; struct net *net; int err; @@ -1775,16 +1774,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb) if (err) goto unlock; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } + for_each_net(net) + call_netdevice_unregister_net_notifiers(nb, net); + unlock: rtnl_unlock(); up_write(&pernet_ops_rwsem); @@ -1792,6 +1784,42 @@ unlock: } EXPORT_SYMBOL(unregister_netdevice_notifier); +static int __register_netdevice_notifier_net(struct net *net, + struct notifier_block *nb, + bool ignore_call_fail) +{ + int err; + + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + return err; + if (dev_boot_phase) + return 0; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err && !ignore_call_fail) + goto chain_unregister; + + return 0; + +chain_unregister: + raw_notifier_chain_unregister(&net->netdev_chain, nb); + return err; +} + +static int __unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + return err; + + call_netdevice_unregister_net_notifiers(nb, net); + return 0; +} + /** * register_netdevice_notifier_net - register a per-netns network notifier block * @net: network namespace @@ -1812,23 +1840,9 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) int err; rtnl_lock(); - err = raw_notifier_chain_register(&net->netdev_chain, nb); - if (err) - goto unlock; - if (dev_boot_phase) - goto unlock; - - err = call_netdevice_register_net_notifiers(nb, net); - if (err) - goto chain_unregister; - -unlock: + err = __register_netdevice_notifier_net(net, nb, false); rtnl_unlock(); return err; - -chain_unregister: - raw_notifier_chain_unregister(&netdev_chain, nb); - goto unlock; } EXPORT_SYMBOL(register_netdevice_notifier_net); @@ -1854,17 +1868,53 @@ int unregister_netdevice_notifier_net(struct net *net, int err; rtnl_lock(); - err = raw_notifier_chain_unregister(&net->netdev_chain, nb); - if (err) - goto unlock; + err = __unregister_netdevice_notifier_net(net, nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); - call_netdevice_unregister_net_notifiers(nb, net); +int register_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; -unlock: + rtnl_lock(); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); + if (!err) { + nn->nb = nb; + list_add(&nn->list, &dev->net_notifier_list); + } rtnl_unlock(); return err; } -EXPORT_SYMBOL(unregister_netdevice_notifier_net); +EXPORT_SYMBOL(register_netdevice_notifier_dev_net); + +int unregister_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; + + rtnl_lock(); + list_del(&nn->list); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net); + +static void move_netdevice_notifiers_dev_net(struct net_device *dev, + struct net *net) +{ + struct netdev_net_notifier *nn; + + list_for_each_entry(nn, &dev->net_notifier_list, list) { + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb); + __register_netdevice_notifier_net(net, nn->nb, true); + } +} /** * call_netdevice_notifiers_info - call all network notifier blocks @@ -3249,7 +3299,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; @@ -4932,7 +4982,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { -#ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { int ingress_retval; @@ -4946,7 +4995,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, rcu_read_unlock(); return ingress_retval; } -#endif /* CONFIG_NETFILTER_INGRESS */ return 0; } @@ -5491,9 +5539,29 @@ static void flush_all_backlogs(void) put_online_cpus(); } +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); -static int napi_gro_complete(struct sk_buff *skb) +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { struct packet_offload *ptype; __be16 type = skb->protocol; @@ -5526,7 +5594,8 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb_internal(skb); + gro_normal_one(napi, skb); + return NET_RX_SUCCESS; } static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, @@ -5539,7 +5608,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; skb_list_del_init(skb); - napi_gro_complete(skb); + napi_gro_complete(napi, skb); napi->gro_hash[index].count--; } @@ -5641,7 +5710,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } -static void gro_flush_oldest(struct list_head *head) +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) { struct sk_buff *oldest; @@ -5657,7 +5726,7 @@ static void gro_flush_oldest(struct list_head *head) * SKB to the chain. */ skb_list_del_init(oldest); - napi_gro_complete(oldest); + napi_gro_complete(napi, oldest); } INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, @@ -5723,7 +5792,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (&ptype->list == head) goto normal; - if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { + if (PTR_ERR(pp) == -EINPROGRESS) { ret = GRO_CONSUMED; goto ok; } @@ -5733,7 +5802,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (pp) { skb_list_del_init(pp); - napi_gro_complete(pp); + napi_gro_complete(napi, pp); napi->gro_hash[hash].count--; } @@ -5744,7 +5813,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { - gro_flush_oldest(gro_head); + gro_flush_oldest(napi, gro_head); } else { napi->gro_hash[hash].count++; } @@ -5802,26 +5871,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ - if (!napi->rx_count) - return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) -{ - list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) - gro_normal_list(napi); -} - static void napi_skb_free_stolen_head(struct sk_buff *skb) { skb_dst_drop(skb); @@ -6200,8 +6249,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - gro_normal_list(n); - if (n->gro_bitmask) { unsigned long timeout = 0; @@ -6217,6 +6264,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); } + + gro_normal_list(n); + if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ local_irq_save(flags); @@ -6548,8 +6598,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } - gro_normal_list(n); - if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -6557,6 +6605,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) napi_gro_flush(n, HZ >= 1000); } + gro_normal_list(n); + /* Some drivers may have called napi_schedule * prior to exhausting their budget. */ @@ -8194,6 +8244,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL(__dev_set_mtu); +int dev_validate_mtu(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + NL_SET_ERR_MSG(extack, "mtu less than device minimum"); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); + return -EINVAL; + } + return 0; +} + /** * dev_set_mtu_ext - Change maximum transfer unit * @dev: device @@ -8210,16 +8276,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, if (new_mtu == dev->mtu) return 0; - /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - NL_SET_ERR_MSG(extack, "mtu less than device minimum"); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu, extack); + if (err) + return err; if (!netif_device_present(dev)) return -ENODEV; @@ -8542,7 +8601,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { + bool non_hw = !(flags & XDP_FLAGS_HW_MODE); + struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; + int err; + + if (non_hw) { + prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, + XDP_QUERY_PROG)); + if (IS_ERR(prev_prog)) + prev_prog = NULL; + } memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -8553,7 +8622,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, xdp.flags = flags; xdp.prog = prog; - return bpf_op(dev, &xdp); + err = bpf_op(dev, &xdp); + if (!err && non_hw) + bpf_prog_change_xdp(prev_prog, prog); + + if (prev_prog) + bpf_prog_put(prev_prog); + + return err; } static void dev_xdp_uninstall(struct net_device *dev) @@ -9177,22 +9253,10 @@ static void netdev_unregister_lockdep_key(struct net_device *dev) void netdev_update_lockdep_key(struct net_device *dev) { - struct netdev_queue *queue; - int i; - - lockdep_unregister_key(&dev->qdisc_xmit_lock_key); lockdep_unregister_key(&dev->addr_list_lock_key); - - lockdep_register_key(&dev->qdisc_xmit_lock_key); lockdep_register_key(&dev->addr_list_lock_key); lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); - for (i = 0; i < dev->num_tx_queues; i++) { - queue = netdev_get_tx_queue(dev, i); - - lockdep_set_class(&queue->_xmit_lock, - &dev->qdisc_xmit_lock_key); - } } EXPORT_SYMBOL(netdev_update_lockdep_key); @@ -9269,7 +9333,7 @@ int register_netdevice(struct net_device *dev) /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ - dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); dev->features |= NETIF_F_SOFT_FEATURES; if (dev->netdev_ops->ndo_udp_tunnel_add) { @@ -9314,8 +9378,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); @@ -9762,6 +9828,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); + INIT_LIST_HEAD(&dev->net_notifier_list); #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif @@ -9832,6 +9899,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + free_percpu(dev->xdp_bulkq); + dev->xdp_bulkq = NULL; netdev_unregister_lockdep_key(dev); @@ -10023,6 +10092,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); netdev_adjacent_del_links(dev); + /* Move per-net netdevice notifiers that are following the netdevice */ + move_netdevice_notifiers_dev_net(dev, net); + /* Actually switch the network namespace */ dev_net_set(dev, net); dev->ifindex = new_ifindex; |