summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c348
-rw-r--r--net/core/ethtool.c45
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/flow_dissector.c21
-rw-r--r--net/core/neighbour.c64
-rw-r--r--net/core/net_namespace.c251
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c142
-rw-r--r--net/core/skbuff.c60
-rw-r--r--net/core/sock.c27
-rw-r--r--net/core/sysctl_net_core.c9
11 files changed, 727 insertions, 245 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f411c28d0a66..1d564d68e31a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
- return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ return pt->dev ? &pt->dev->ptype_specific :
+ &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
@@ -1694,6 +1695,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return 0;
}
@@ -1733,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+ struct packet_type **pt,
+ struct net_device *dev, __be16 type,
+ struct list_head *ptype_list)
+{
+ struct packet_type *ptype, *pt_prev = *pt;
+
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->type != type)
+ continue;
+ if (pt_prev)
+ deliver_skb(skb, pt_prev, dev);
+ pt_prev = ptype;
+ }
+ *pt = pt_prev;
+}
+
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
if (!ptype->af_packet_priv || !skb->sk)
@@ -1756,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
struct packet_type *pt_prev = NULL;
+ struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
- if ((ptype->dev == dev || !ptype->dev) &&
- (!skb_loop_sk(ptype, skb))) {
- if (pt_prev) {
- deliver_skb(skb2, pt_prev, skb->dev);
- pt_prev = ptype;
- continue;
- }
+ if (skb_loop_sk(ptype, skb))
+ continue;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- break;
+ if (pt_prev) {
+ deliver_skb(skb2, pt_prev, skb->dev);
+ pt_prev = ptype;
+ continue;
+ }
- net_timestamp_set(skb2);
+ /* need to clone skb, done only once */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out_unlock;
- /* skb->nh should be correctly
- set by sender, so that the second statement is
- just protection against buggy protocols.
- */
- skb_reset_mac_header(skb2);
-
- if (skb_network_header(skb2) < skb2->data ||
- skb_network_header(skb2) > skb_tail_pointer(skb2)) {
- net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
- ntohs(skb2->protocol),
- dev->name);
- skb_reset_network_header(skb2);
- }
+ net_timestamp_set(skb2);
- skb2->transport_header = skb2->network_header;
- skb2->pkt_type = PACKET_OUTGOING;
- pt_prev = ptype;
+ /* skb->nh should be correctly
+ * set by sender, so that the second statement is
+ * just protection against buggy protocols.
+ */
+ skb_reset_mac_header(skb2);
+
+ if (skb_network_header(skb2) < skb2->data ||
+ skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+ net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+ ntohs(skb2->protocol),
+ dev->name);
+ skb_reset_network_header(skb2);
}
+
+ skb2->transport_header = skb2->network_header;
+ skb2->pkt_type = PACKET_OUTGOING;
+ pt_prev = ptype;
+ }
+
+ if (ptype_list == &ptype_all) {
+ ptype_list = &dev->ptype_all;
+ goto again;
}
+out_unlock:
if (pt_prev)
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
@@ -2522,7 +2550,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
-#ifdef CONFIG_NET_MPLS_GSO
+#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
@@ -2562,7 +2590,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
- const struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
__be16 protocol = skb->protocol;
@@ -2570,11 +2598,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (!skb_vlan_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+ goto finalize;
+ }
}
features = netdev_intersect_features(features,
@@ -2591,6 +2629,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+finalize:
+ if (dev->netdev_ops->ndo_features_check)
+ features &= dev->netdev_ops->ndo_features_check(skb, dev,
+ features);
+
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
@@ -2601,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
unsigned int len;
int rc;
- if (!list_empty(&ptype_all))
+ if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
dev_queue_xmit_nit(skb, dev);
len = skb->len;
@@ -2643,7 +2686,7 @@ out:
static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
netdev_features_t features)
{
- if (vlan_tx_tag_present(skb) &&
+ if (skb_vlan_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto))
skb = __vlan_hwaccel_push_inside(skb);
return skb;
@@ -2661,19 +2704,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
-
if (netif_needs_gso(dev, skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
if (IS_ERR(segs)) {
- segs = NULL;
+ goto out_kfree_skb;
} else if (segs) {
consume_skb(skb);
skb = segs;
@@ -3606,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct net_device *orig_dev;
- struct net_device *null_or_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
@@ -3649,11 +3684,15 @@ another_round:
goto skip_taps;
list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
+ }
+
+ list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
}
skip_taps:
@@ -3667,7 +3706,7 @@ ncls:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
@@ -3699,8 +3738,8 @@ ncls:
}
}
- if (unlikely(vlan_tx_tag_present(skb))) {
- if (vlan_tx_tag_get_id(skb))
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ if (skb_vlan_tag_get_id(skb))
skb->pkt_type = PACKET_OTHERHOST;
/* Note: we might in the future use prio bits
* and set skb->priority like in vlan_do_receive()
@@ -3709,19 +3748,21 @@ ncls:
skb->vlan_tci = 0;
}
+ type = skb->protocol;
+
/* deliver only exact match when indicated */
- null_or_dev = deliver_exact ? skb->dev : NULL;
+ if (likely(!deliver_exact)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &ptype_base[ntohs(type) &
+ PTYPE_HASH_MASK]);
+ }
- type = skb->protocol;
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type &&
- (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
- ptype->dev == orig_dev)) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &orig_dev->ptype_specific);
+
+ if (unlikely(skb->dev != orig_dev)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &skb->dev->ptype_specific);
}
if (pt_prev) {
@@ -4557,6 +4598,68 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ void *have;
+ int work, weight;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
+
+ WARN_ON_ONCE(work > weight);
+
+ if (likely(work < weight))
+ goto out_unlock;
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(napi_disable_pending(n))) {
+ napi_complete(n);
+ goto out_unlock;
+ }
+
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ napi_gro_flush(n, HZ >= 1000);
+ }
+
+ /* Some drivers may have called napi_schedule
+ * prior to exhausting their budget.
+ */
+ if (unlikely(!list_empty(&n->poll_list))) {
+ pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
+ n->dev ? n->dev->name : "backlog");
+ goto out_unlock;
+ }
+
+ list_add_tail(&n->poll_list, repoll);
+
+out_unlock:
+ netpoll_poll_unlock(have);
+
+ return work;
+}
+
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4564,74 +4667,34 @@ static void net_rx_action(struct softirq_action *h)
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
- void *have;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
- while (!list_empty(&list)) {
+ for (;;) {
struct napi_struct *n;
- int work, weight;
-
- /* If softirq window is exhausted then punt.
- * Allow this to run for 2 jiffies since which will allow
- * an average latency of 1.5/HZ.
- */
- if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
- goto softnet_break;
-
- n = list_first_entry(&list, struct napi_struct, poll_list);
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
- weight = n->weight;
-
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n);
+ if (list_empty(&list)) {
+ if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+ return;
+ break;
}
- WARN_ON_ONCE(work > weight);
-
- budget -= work;
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ budget -= napi_poll(n, &repoll);
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
+ /* If softirq window is exhausted then punt.
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(work == weight)) {
- if (unlikely(napi_disable_pending(n))) {
- napi_complete(n);
- } else {
- if (n->gro_list) {
- /* flush too old packets
- * If HZ < 1000, flush all packets.
- */
- napi_gro_flush(n, HZ >= 1000);
- }
- list_add_tail(&n->poll_list, &repoll);
- }
+ if (unlikely(budget <= 0 ||
+ time_after_eq(jiffies, time_limit))) {
+ sd->time_squeeze++;
+ break;
}
-
- netpoll_poll_unlock(have);
}
- if (!sd_has_rps_ipi_waiting(sd) &&
- list_empty(&list) &&
- list_empty(&repoll))
- return;
-out:
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -4641,12 +4704,6 @@ out:
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
-
- return;
-
-softnet_break:
- sd->time_squeeze++;
- goto out;
}
struct netdev_adjacent {
@@ -6147,13 +6204,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
+ size_t sz = count * sizeof(*rx);
BUG_ON(count < 1);
- rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!rx)
- return -ENOMEM;
-
+ rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!rx) {
+ rx = vzalloc(sz);
+ if (!rx)
+ return -ENOMEM;
+ }
dev->_rx = rx;
for (i = 0; i < count; i++)
@@ -6551,6 +6611,8 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
+ BUG_ON(!list_empty(&dev->ptype_all));
+ BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
@@ -6733,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->all_adj_list.upper);
INIT_LIST_HEAD(&dev->all_adj_list.lower);
+ INIT_LIST_HEAD(&dev->ptype_all);
+ INIT_LIST_HEAD(&dev->ptype_specific);
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -6783,7 +6847,7 @@ void free_netdev(struct net_device *dev)
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
+ kvfree(dev->_rx);
#endif
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -7047,10 +7111,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
oldsd->output_queue = NULL;
oldsd->output_queue_tailp = &oldsd->output_queue;
}
- /* Append NAPI poll list from offline CPU. */
- if (!list_empty(&oldsd->poll_list)) {
- list_splice_init(&oldsd->poll_list, &sd->poll_list);
- raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ /* Append NAPI poll list from offline CPU, with one exception :
+ * process_backlog() must be called by cpu owning percpu backlog.
+ * We properly handle process_queue & input_pkt_queue later.
+ */
+ while (!list_empty(&oldsd->poll_list)) {
+ struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+ struct napi_struct,
+ poll_list);
+
+ list_del_init(&napi->poll_list);
+ if (napi->poll == process_backlog)
+ napi->state = 0;
+ else
+ ____napi_schedule(sd, napi);
}
raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -7061,7 +7135,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx_internal(skb);
input_queue_head_incr(oldsd);
}
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+ while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx_internal(skb);
input_queue_head_incr(oldsd);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 550892cd6b3f..91f74f3eb204 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
return err;
}
+static int __ethtool_get_module_info(struct net_device *dev,
+ struct ethtool_modinfo *modinfo)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct phy_device *phydev = dev->phydev;
+
+ if (phydev && phydev->drv && phydev->drv->module_info)
+ return phydev->drv->module_info(phydev, modinfo);
+
+ if (ops->get_module_info)
+ return ops->get_module_info(dev, modinfo);
+
+ return -EOPNOTSUPP;
+}
+
static int ethtool_get_module_info(struct net_device *dev,
void __user *useraddr)
{
int ret;
struct ethtool_modinfo modinfo;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_module_info)
- return -EOPNOTSUPP;
if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
return -EFAULT;
- ret = ops->get_module_info(dev, &modinfo);
+ ret = __ethtool_get_module_info(dev, &modinfo);
if (ret)
return ret;
@@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev,
return 0;
}
+static int __ethtool_get_module_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *ee, u8 *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct phy_device *phydev = dev->phydev;
+
+ if (phydev && phydev->drv && phydev->drv->module_eeprom)
+ return phydev->drv->module_eeprom(phydev, ee, data);
+
+ if (ops->get_module_eeprom)
+ return ops->get_module_eeprom(dev, ee, data);
+
+ return -EOPNOTSUPP;
+}
+
static int ethtool_get_module_eeprom(struct net_device *dev,
void __user *useraddr)
{
int ret;
struct ethtool_modinfo modinfo;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_module_info || !ops->get_module_eeprom)
- return -EOPNOTSUPP;
- ret = ops->get_module_info(dev, &modinfo);
+ ret = __ethtool_get_module_info(dev, &modinfo);
if (ret)
return ret;
- return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom,
+ return ethtool_get_any_eeprom(dev, useraddr,
+ __ethtool_get_module_eeprom,
modinfo.eeprom_len);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..44706e81b2e0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (ops->fill(rule, skb, frh) < 0)
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 45084938c403..2c35c02a931e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,6 +178,20 @@ ipv6:
return false;
}
}
+ case htons(ETH_P_TIPC): {
+ struct {
+ __be32 pre[3];
+ __be32 srcnode;
+ } *hdr, _hdr;
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return false;
+ flow->src = hdr->srcnode;
+ flow->dst = 0;
+ flow->n_proto = proto;
+ flow->thoff = (u16)nhoff;
+ return true;
+ }
case htons(ETH_P_FCOE):
flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
@@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
map = rcu_dereference(
- dev_maps->cpu_map[raw_smp_processor_id()]);
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
@@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
{
int queue_index = 0;
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e38f17288d3..70fe9e10ac86 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
goto nla_put_failure;
read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
read_unlock_bh(&tbl->lock);
@@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
goto errout;
read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
errout:
read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
@@ -2043,6 +2045,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
case NDTPA_BASE_REACHABLE_TIME:
NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
nla_get_msecs(tbp[i]));
+ /* update reachable_time as well, otherwise, the change will
+ * only be effective after the next time neigh_periodic_work
+ * decides to recompute it (can be multiple minutes)
+ */
+ p->reachable_time =
+ neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
break;
case NDTPA_GC_STALETIME:
NEIGH_VAR_SET(p, GC_STALETIME,
@@ -2120,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ NLM_F_MULTI) < 0)
break;
nidx = 0;
@@ -2136,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ NLM_F_MULTI) < 0)
goto out;
next:
nidx++;
@@ -2196,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2226,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2264,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI) <= 0) {
+ NLM_F_MULTI) < 0) {
rc = -1;
goto out;
}
@@ -2301,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI, tbl) <= 0) {
+ NLM_F_MULTI, tbl) < 0) {
read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
@@ -2921,6 +2931,31 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
+static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct neigh_parms *p = ctl->extra2;
+ int ret;
+
+ if (strcmp(ctl->procname, "base_reachable_time") == 0)
+ ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
+ else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
+ ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
+ else
+ ret = -1;
+
+ if (write && ret == 0) {
+ /* update reachable_time as well, otherwise, the change will
+ * only be effective after the next time neigh_periodic_work
+ * decides to recompute it
+ */
+ p->reachable_time =
+ neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
+ }
+ return ret;
+}
+
#define NEIGH_PARMS_DATA_OFFSET(index) \
(&((struct neigh_parms *) 0)->data[index])
@@ -3047,6 +3082,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
/* ReachableTime (in milliseconds) */
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+ } else {
+ /* Those handlers will update p->reachable_time after
+ * base_reachable_time(_ms) is set to ensure the new timer starts being
+ * applied after the next neighbour update instead of waiting for
+ * neigh_periodic_work to update its value (can be multiple minutes)
+ * So any handler that replaces them should do this as well
+ */
+ /* ReachableTime */
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
+ neigh_proc_base_reachable_time;
+ /* ReachableTime (in milliseconds) */
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
+ neigh_proc_base_reachable_time;
}
/* Don't export sysctls to unprivileged users */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5d5ee8f3e4ff..cb5290b8c428 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
#include <linux/file.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+ int min = 0, max = 0;
+
+ ASSERT_RTNL();
+
+ if (reqid >= 0) {
+ min = reqid;
+ max = reqid + 1;
+ }
+
+ return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+ if (net_eq(net, peer))
+ return id ? : NET_ID_ZERO;
+ return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+ int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+ ASSERT_RTNL();
+
+ /* Magic value for id 0. */
+ if (id == NET_ID_ZERO)
+ return 0;
+ if (id > 0)
+ return id;
+
+ if (alloc)
+ return alloc_netid(net, peer, -1);
+
+ return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+ int id = __peernet2id(net, peer, true);
+
+ return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+EXPORT_SYMBOL(peernet2id);
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+ struct net *peer;
+
+ if (id < 0)
+ return NULL;
+
+ rcu_read_lock();
+ peer = idr_find(&net->netns_ids, id);
+ if (peer)
+ get_net(peer);
+ rcu_read_unlock();
+
+ return peer;
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
atomic_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
+ idr_init(&net->netns_ids);
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry(net, &net_kill_list, cleanup_list) {
list_del_rcu(&net->list);
list_add_tail(&net->exit_list, &net_exit_list);
+ for_each_net(tmp) {
+ int id = __peernet2id(tmp, net, false);
+
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ }
+ idr_destroy(&net->netns_ids);
+
}
rtnl_unlock();
@@ -337,17 +422,17 @@ EXPORT_SYMBOL_GPL(__put_net);
struct net *get_net_ns_by_fd(int fd)
{
- struct proc_ns *ei;
struct file *file;
+ struct ns_common *ns;
struct net *net;
file = proc_ns_fget(fd);
if (IS_ERR(file))
return ERR_CAST(file);
- ei = get_proc_ns(file_inode(file));
- if (ei->ns_ops == &netns_operations)
- net = get_net(ei->ns);
+ ns = get_proc_ns(file_inode(file));
+ if (ns->ops == &netns_operations)
+ net = get_net(container_of(ns, struct net, ns));
else
net = ERR_PTR(-EINVAL);
@@ -387,12 +472,15 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
static __net_init int net_ns_net_init(struct net *net)
{
- return proc_alloc_inum(&net->proc_inum);
+#ifdef CONFIG_NET_NS
+ net->ns.ops = &netns_operations;
+#endif
+ return ns_alloc_inum(&net->ns);
}
static __net_exit void net_ns_net_exit(struct net *net)
{
- proc_free_inum(net->proc_inum);
+ ns_free_inum(&net->ns);
}
static struct pernet_operations __net_initdata net_ns_ops = {
@@ -400,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
.exit = net_ns_net_exit,
};
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+ [NETNSA_NONE] = { .type = NLA_UNSPEC },
+ [NETNSA_NSID] = { .type = NLA_S32 },
+ [NETNSA_PID] = { .type = NLA_U32 },
+ [NETNSA_FD] = { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct net *peer;
+ int nsid, err;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (!tb[NETNSA_NSID])
+ return -EINVAL;
+ nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ if (__peernet2id(net, peer, false) >= 0) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ err = alloc_netid(net, peer, nsid);
+ if (err > 0)
+ err = 0;
+out:
+ put_net(peer);
+ return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+ + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+ ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+ int cmd, struct net *net, struct net *peer)
+{
+ struct nlmsghdr *nlh;
+ struct rtgenmsg *rth;
+ int id;
+
+ ASSERT_RTNL();
+
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rth = nlmsg_data(nlh);
+ rth->rtgen_family = AF_UNSPEC;
+
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nla_put_s32(skb, NETNSA_NSID, id))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct sk_buff *msg;
+ int err = -ENOBUFS;
+ struct net *peer;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ RTM_GETNSID, net, peer);
+ if (err < 0)
+ goto err_out;
+
+ err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+ goto out;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ put_net(peer);
+ return err;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -433,6 +645,9 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
+ rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
return 0;
}
@@ -630,7 +845,7 @@ void unregister_pernet_device(struct pernet_operations *ops)
EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
-static void *netns_get(struct task_struct *task)
+static struct ns_common *netns_get(struct task_struct *task)
{
struct net *net = NULL;
struct nsproxy *nsproxy;
@@ -641,17 +856,22 @@ static void *netns_get(struct task_struct *task)
net = get_net(nsproxy->net_ns);
task_unlock(task);
- return net;
+ return net ? &net->ns : NULL;
+}
+
+static inline struct net *to_net_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct net, ns);
}
-static void netns_put(void *ns)
+static void netns_put(struct ns_common *ns)
{
- put_net(ns);
+ put_net(to_net_ns(ns));
}
-static int netns_install(struct nsproxy *nsproxy, void *ns)
+static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
- struct net *net = ns;
+ struct net *net = to_net_ns(ns);
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
@@ -662,18 +882,11 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
-static unsigned int netns_inum(void *ns)
-{
- struct net *net = ns;
- return net->proc_inum;
-}
-
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
- .inum = netns_inum,
};
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e0ad5d16c9c5..c126a878c47c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
features = netif_skb_features(skb);
- if (vlan_tx_tag_present(skb) &&
+ if (skb_vlan_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_hwaccel_push_inside(skb);
if (unlikely(!skb)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d06107d36ec8..673cb4c6f391 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
for (i = 0; i < RTAX_MAX; i++) {
if (metrics[i]) {
+ if (i == RTAX_CC_ALGO - 1) {
+ char tmp[TCP_CA_NAME_MAX], *name;
+
+ name = tcp_ca_get_name_by_key(metrics[i], tmp);
+ if (!name)
+ continue;
+ if (nla_put_string(skb, i + 1, name))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(skb, i + 1, metrics[i]))
+ goto nla_put_failure;
+ }
valid++;
- if (nla_put_u32(skb, i+1, metrics[i]))
- goto nla_put_failure;
}
}
@@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ + nla_total_size(4) /* IFLA_LINK_NETNSID */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (dev->rtnl_link_ops &&
+ dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id(dev_net(dev), link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ goto nla_put_failure;
+ }
+ }
+
if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
goto nla_put_failure;
@@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_nest_end(skb, af_spec);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
+ [IFLA_LINK_NETNSID] = { .type = NLA_S32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1315,7 +1341,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
*/
WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err <= 0)
+ if (err < 0)
goto out;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1996,7 +2022,7 @@ replay:
struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
struct nlattr **data = NULL;
struct nlattr **slave_data = NULL;
- struct net *dest_net;
+ struct net *dest_net, *link_net = NULL;
if (ops) {
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -2102,7 +2128,18 @@ replay:
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out;
@@ -2111,7 +2148,7 @@ replay:
dev->ifindex = ifm->ifi_index;
if (ops->newlink) {
- err = ops->newlink(net, dev, tb, data);
+ err = ops->newlink(link_net ? : net, dev, tb, data);
/* Drivers should call free_netdev() in ->destructor
* and unregister it on failure after registration
* so that device could be finally freed in rtnl_unlock.
@@ -2130,9 +2167,19 @@ replay:
}
}
err = rtnl_configure_link(dev, ifm);
- if (err < 0)
+ if (err < 0) {
unregister_netdevice(dev);
+ goto out;
+ }
+
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
+ if (err < 0)
+ unregister_netdevice(dev);
+ }
out:
+ if (link_net)
+ put_net(link_net);
put_net(dest_net);
return err;
}
@@ -2315,7 +2362,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2368,6 +2416,11 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
return err;
}
+ if (vid) {
+ pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+ return err;
+ }
+
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
err = dev_uc_add_excl(dev, addr);
else if (is_multicast_ether_addr(addr))
@@ -2693,10 +2746,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
idx);
}
- idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (dev->netdev_ops->ndo_fdb_dump)
- idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
+ idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
idx);
+ else
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
cops = NULL;
}
@@ -2792,7 +2846,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
nla_nest_end(skb, protinfo);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -2863,32 +2918,24 @@ static inline size_t bridge_nlmsg_size(void)
+ nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
}
-static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
+static int rtnl_bridge_notify(struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct net_device *br_dev = netdev_master_upper_dev_get(dev);
struct sk_buff *skb;
int err = -EOPNOTSUPP;
+ if (!dev->netdev_ops->ndo_bridge_getlink)
+ return 0;
+
skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
if (!skb) {
err = -ENOMEM;
goto errout;
}
- if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
- br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
- err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
- if (err < 0)
- goto errout;
- }
-
- if ((flags & BRIDGE_FLAGS_SELF) &&
- dev->netdev_ops->ndo_bridge_getlink) {
- err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
- if (err < 0)
- goto errout;
- }
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
+ if (err < 0)
+ goto errout;
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
@@ -2906,7 +2953,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
- u16 oflags, flags = 0;
+ u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -2936,8 +2983,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- oflags = flags;
-
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
@@ -2946,7 +2991,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+ err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
if (err)
goto out;
@@ -2957,17 +3002,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev->netdev_ops->ndo_bridge_setlink)
err = -EOPNOTSUPP;
else
- err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
-
- if (!err)
+ err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
+ flags);
+ if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
+
+ /* Generate event to notify upper layer of bridge
+ * change
+ */
+ err = rtnl_bridge_notify(dev);
+ }
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
- /* Generate event to notify upper layer of bridge change */
- if (!err)
- err = rtnl_bridge_notify(dev, oflags);
out:
return err;
}
@@ -2979,7 +3027,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
- u16 oflags, flags = 0;
+ u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -3009,8 +3057,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- oflags = flags;
-
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
@@ -3019,7 +3065,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+ err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
if (err)
goto out;
@@ -3030,17 +3076,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev->netdev_ops->ndo_bridge_dellink)
err = -EOPNOTSUPP;
else
- err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+ err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
+ flags);
- if (!err)
+ if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
+
+ /* Generate event to notify upper layer of bridge
+ * change
+ */
+ err = rtnl_bridge_notify(dev);
+ }
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
- /* Generate event to notify upper layer of bridge change */
- if (!err)
- err = rtnl_bridge_notify(dev, oflags);
out:
return err;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ae13ef6b3ea7..88c613eab142 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
#include <asm/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
+#include <linux/capability.h>
+#include <linux/user_namespace.h>
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
-/* XXX: IS this still necessary? - JHS */
-#ifdef CONFIG_NET_SCHED
- skb->tc_index = 0;
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-#endif
-#endif
}
/* Free everything but the sk_buff shell. */
@@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif
+#ifdef CONFIG_XPS
+ CHECK_SKB_FIELD(sender_cpu);
+#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
#ifdef CONFIG_NET_CLS_ACT
@@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
kfree_skb(skb);
}
+static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+{
+ bool ret;
+
+ if (likely(sysctl_tstamp_allow_data || tsonly))
+ return true;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ ret = sk->sk_socket && sk->sk_socket->file &&
+ file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
+ read_unlock_bh(&sk->sk_callback_lock);
+ return ret;
+}
+
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
{
struct sock *sk = skb->sk;
+ if (!skb_may_tx_timestamp(sk, false))
+ return;
+
/* take a reference to prevent skb_orphan() from freeing the socket */
sock_hold(sk);
@@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
+ bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
- if (!sk)
+ if (!sk || !skb_may_tx_timestamp(sk, tsonly))
return;
- if (hwtstamps)
- *skb_hwtstamps(orig_skb) = *hwtstamps;
+ if (tsonly)
+ skb = alloc_skb(0, GFP_ATOMIC);
else
- orig_skb->tstamp = ktime_get_real();
-
- skb = skb_clone(orig_skb, GFP_ATOMIC);
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
+ if (tsonly) {
+ skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+ skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+ }
+
+ if (hwtstamps)
+ *skb_hwtstamps(skb) = *hwtstamps;
+ else
+ skb->tstamp = ktime_get_real();
+
__skb_complete_tx_timestamp(skb, sk, tstype);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
@@ -4148,6 +4172,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->ignore_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
+ skb->sender_cpu = 0;
+ skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
@@ -4203,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
struct vlan_hdr *vhdr;
u16 vlan_tci;
- if (unlikely(vlan_tx_tag_present(skb))) {
+ if (unlikely(skb_vlan_tag_present(skb))) {
/* vlan_tci is already set-up so leave this for another time */
return skb;
}
@@ -4289,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb)
__be16 vlan_proto;
int err;
- if (likely(vlan_tx_tag_present(skb))) {
+ if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
@@ -4319,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
unsigned int offset = skb->data - skb_mac_header(skb);
int err;
@@ -4329,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
*/
__skb_push(skb, offset);
err = __vlan_insert_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
+ skb_vlan_tag_get(skb));
if (err)
return err;
skb->protocol = skb->vlan_proto;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9a56b2000c3f..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
+int sysctl_tstamp_allow_data __read_mostly = 1;
+
struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
EXPORT_SYMBOL_GPL(memalloc_socks);
@@ -840,6 +842,7 @@ set_rcvbuf:
ret = -EINVAL;
break;
}
+
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk->sk_protocol == IPPROTO_TCP) {
@@ -1731,18 +1734,34 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
}
EXPORT_SYMBOL(sock_kmalloc);
-/*
- * Free an option memory block.
+/* Free an option memory block. Note, we actually want the inline
+ * here as this allows gcc to detect the nullify and fold away the
+ * condition entirely.
*/
-void sock_kfree_s(struct sock *sk, void *mem, int size)
+static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
+ const bool nullify)
{
if (WARN_ON_ONCE(!mem))
return;
- kfree(mem);
+ if (nullify)
+ kzfree(mem);
+ else
+ kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);
}
+
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+ __sock_kfree_s(sk, mem, size, false);
+}
EXPORT_SYMBOL(sock_kfree_s);
+void sock_kzfree_s(struct sock *sk, void *mem, int size)
+{
+ __sock_kfree_s(sk, mem, size, true);
+}
+EXPORT_SYMBOL(sock_kzfree_s);
+
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..fde21d19e61b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tstamp_allow_data",
+ .data = &sysctl_tstamp_allow_data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one
+ },
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",