From 6ad3122a08e3a9c2148873665752e87cf4f393cc Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Feb 2016 10:40:07 +0100 Subject: flowcache: Avoid OOM condition under preasure We can hit an OOM condition if we are under presure because we can not free the entries in gc_list fast enough. So add a counter for the not yet freed entries in the gc_list and refuse new allocations if the value is too high. Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/core/flow.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 730d82ad6ee5..24cd3949a9a4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -80,6 +80,7 @@ struct netns_xfrm { struct flow_cache flow_cache_global; atomic_t flow_cache_genid; struct list_head flow_cache_gc_list; + atomic_t flow_cache_gc_count; spinlock_t flow_cache_gc_lock; struct work_struct flow_cache_gc_work; struct work_struct flow_cache_flush_work; diff --git a/net/core/flow.c b/net/core/flow.c index 1033725be40b..3937b1b68d5b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work) list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); spin_unlock_bh(&xfrm->flow_cache_gc_lock); - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) + list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); + atomic_dec(&xfrm->flow_cache_gc_count); + WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); + } } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, @@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, struct netns_xfrm *xfrm) { if (deleted) { + atomic_add(deleted, &xfrm->flow_cache_gc_count); fcp->hash_count -= deleted; spin_lock_bh(&xfrm->flow_cache_gc_lock); list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); @@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); + if (fcp->hash_count > 2 * fc->high_watermark || + atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { + atomic_inc(&net->xfrm.flow_cache_genid); + flo = ERR_PTR(-ENOBUFS); + goto ret_object; + } + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->net = net; @@ -446,6 +457,7 @@ int flow_cache_init(struct net *net) INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); mutex_init(&net->xfrm.flow_flush_sem); + atomic_set(&net->xfrm.flow_cache_gc_count, 0); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); -- cgit v1.2.3 From 215276c0147ef49bc07692ca68bae35a30a64b9a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Feb 2016 10:56:45 +0100 Subject: xfrm: Reset encapsulation field of the skb before transformation The inner headers are invalid after a xfrm transformation. So reset the skb encapsulation field to ensure nobody tries to access the inner headers. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ff4a91fcab9f..637387bbaaea 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); + /* Inner headers are invalid now. */ + skb->encapsulation = 0; + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; -- cgit v1.2.3 From d6af1a31cc72fbd558c7eddbc36f61bf09d1cf6a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Mar 2016 10:17:37 +0100 Subject: vti: Add pmtu handling to vti_xmit. We currently rely on the PMTU discovery of xfrm. However if a packet is locally sent, the PMTU mechanism of xfrm tries to do local socket notification what might not work for applications like ping that don't check for this. So add pmtu handling to vti_xmit to report MTU changes immediately. Reported-by: Mark McKinstry Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5cf10b777b7e..a917903d5e97 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int err; + int mtu; if (!dst) { dev->stats.tx_carrier_errors++; @@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + mtu = dst_mtu(dst); + if (skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->protocol == htons(ETH_P_IP)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + } else { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + dst_release(dst); + goto tx_error; + } + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; -- cgit v1.2.3 From 20decb7e486d7eefff3931f58d092d2d7c024a1c Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Tue, 3 May 2016 16:05:07 +0200 Subject: drivers: net: xgene: Fix error handling When probe bails out with an error, we try to unregister the netdev before we have even registered it. Fix the goto statements for that. Signed-off-by: Matthias Brugger Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 8d4c1ad2fc60..99d7e580e166 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1595,21 +1595,22 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_init_hw(pdata); if (ret) - goto err; + goto err_netdev; mac_ops = pdata->mac_ops; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { ret = xgene_enet_mdio_config(pdata); if (ret) - goto err; + goto err_netdev; } else { INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state); } xgene_enet_napi_add(pdata); return 0; -err: +err_netdev: unregister_netdev(ndev); +err: free_netdev(ndev); return ret; } -- cgit v1.2.3 From 1c021bb717a70aaeaa4b25c91f43c2aeddd922de Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 3 May 2016 16:38:53 +0200 Subject: net: fec: only clear a queue's work bit if the queue was emptied MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the receive path a queue's work bit was cleared unconditionally even if fec_enet_rx_queue only read out a part of the available packets from the hardware. This resulted in not reading any packets in the next napi turn and so packets were delayed or lost. The obvious fix is to only clear a queue's bit when the queue was emptied. Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue") Signed-off-by: Uwe Kleine-König Reviewed-by: Lucas Stach Tested-by: Fugang Duan Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 08243c2ff4b4..2a03857cca18 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1521,9 +1521,15 @@ fec_enet_rx(struct net_device *ndev, int budget) struct fec_enet_private *fep = netdev_priv(ndev); for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) { - clear_bit(queue_id, &fep->work_rx); - pkt_received += fec_enet_rx_queue(ndev, + int ret; + + ret = fec_enet_rx_queue(ndev, budget - pkt_received, queue_id); + + if (ret < budget - pkt_received) + clear_bit(queue_id, &fep->work_rx); + + pkt_received += ret; } return pkt_received; } -- cgit v1.2.3 From b8670c09f37bdf2847cc44f36511a53afc6161fd Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:35:05 -0400 Subject: net: fix infoleak in llc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack object “info” has a total size of 12 bytes. Its last byte is padding which is not initialized and leaked via “put_cmsg”. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/llc/af_llc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b3c52e3f689a..8ae3ed97d95c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); -- cgit v1.2.3 From 5f8e44741f9f216e33736ea4ec65ca9ac03036e6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:46:24 -0400 Subject: net: fix infoleak in rtnetlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack object “map” has a total size of 32 bytes. Its last 4 bytes are padding generated by compiler. These padding bytes are not initialized and sent out via “nla_put”. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a75f7e94b445..65763c29f845 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1180,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; -- cgit v1.2.3 From 67a95e2022c7f0405408fb1f910283785ece354a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 4 May 2016 16:56:43 -0400 Subject: bnxt_en: Need memory barrier when processing the completion ring. The code determines if the next ring entry is valid before proceeding further to read the rest of the entry. The CPU can re-order and read the rest of the entry first, possibly reading a stale entry, if DMA of a new entry happens right after reading it. This issue can be readily seen on a ppc64 system, causing it to crash. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 72eb29ed0359..f33ff205a0ea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1388,6 +1388,10 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (!TX_CMP_VALID(txcmp, raw_cons)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + rmb(); if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ -- cgit v1.2.3 From 7d2837dd7a3239e8201d9bef75c1a708e451e123 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 4 May 2016 16:56:44 -0400 Subject: bnxt_en: Setup multicast properly after resetting device. The multicast/all-multicast internal flags are not properly restored after device reset. This could lead to unreliable multicast operations after an ethtool configuration change for example. Call bnxt_mc_list_updated() and setup the vnic->mask in bnxt_init_chip() to fix the issue. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f33ff205a0ea..9d4e8e113fe1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4042,9 +4042,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) } static int bnxt_cfg_rx_mode(struct bnxt *); +static bool bnxt_mc_list_updated(struct bnxt *, u32 *); static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; int rc = 0; if (irq_re_init) { @@ -4100,13 +4102,22 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc); goto err_out; } - bp->vnic_info[0].uc_filter_count = 1; + vnic->uc_filter_count = 1; - bp->vnic_info[0].rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; + vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp)) - bp->vnic_info[0].rx_mask |= - CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + + if (bp->dev->flags & IFF_ALLMULTI) { + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + } else { + u32 mask = 0; + + bnxt_mc_list_updated(bp, &mask); + vnic->rx_mask |= mask; + } rc = bnxt_cfg_rx_mode(bp); if (rc) -- cgit v1.2.3 From 82d69203df634b4dfa765c94f60ce9482bcc44d6 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 4 May 2016 15:00:33 +0300 Subject: net/mlx4_en: Fix endianness bug in IPV6 csum calculation Use htons instead of unconditionally byte swapping nexthdr. On a little endian systems shifting the byte is correct behavior, but it results in incorrect csums on big endian architectures. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Daniel Jurgens Reviewed-by: Carol Soto Tested-by: Carol Soto Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b723e3bcab39..ca3a38421ee7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -707,7 +707,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); -- cgit v1.2.3 From dedc58e067d8c379a15a8a183c5db318201295bb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 May 2016 14:21:53 +0100 Subject: VSOCK: do not disconnect socket when peer has shutdown SEND only The peer may be expecting a reply having sent a request and then done a shutdown(SHUT_WR), so tearing down the whole socket at this point seems wrong and breaks for me with a client which does a SHUT_WR. Looking at other socket family's stream_recvmsg callbacks doing a shutdown here does not seem to be the norm and removing it does not seem to have had any adverse effects that I can see. I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact on the vmci transport. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Stefan Hajnoczi Cc: Claudio Imbrenda Cc: Andy King Cc: Dmitry Torokhov Cc: Jorgen Hansen Cc: Adit Ranadive Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3dce53ebea92..b5f1221f48d4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk); -- cgit v1.2.3 From 31ca0458a61a502adb7ed192bf9716c6d05791a5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 4 May 2016 16:18:45 +0200 Subject: net: bridge: fix old ioctl unlocked net device walk get_bridge_ifindices() is used from the old "deviceless" bridge ioctl calls which aren't called with rtnl held. The comment above says that it is called with rtnl but that is not really the case. Here's a sample output from a test ASSERT_RTNL() which I put in get_bridge_ifindices and executed "brctl show": [ 957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30) [ 957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: G W O 4.6.0-rc4+ #157 [ 957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 957.423009] 0000000000000000 ffff880058adfdf0 ffffffff8138dec5 0000000000000400 [ 957.423009] ffffffff81ce8380 ffff880058adfe58 ffffffffa05ead32 0000000000000001 [ 957.423009] 00007ffec1a444b0 0000000000000400 ffff880053c19130 0000000000008940 [ 957.423009] Call Trace: [ 957.423009] [] dump_stack+0x85/0xc0 [ 957.423009] [] br_ioctl_deviceless_stub+0x212/0x2e0 [bridge] [ 957.423009] [] sock_ioctl+0x22b/0x290 [ 957.423009] [] do_vfs_ioctl+0x95/0x700 [ 957.423009] [] SyS_ioctl+0x79/0x90 [ 957.423009] [] entry_SYSCALL_64_fastpath+0x23/0xc1 Since it only reads bridge ifindices, we can use rcu to safely walk the net device list. Also remove the wrong rtnl comment above. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de4de57..60a3dbfca8a1 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } -- cgit v1.2.3 From 856ce5d083e14571d051301fe3c65b32b8cbe321 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Wed, 4 May 2016 17:25:02 +0200 Subject: bridge: fix igmp / mld query parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the newly introduced helper functions the skb pulling is hidden in the checksumming function - and undone before returning to the caller. The IGMP and MLD query parsing functions in the bridge still assumed that the skb is pointing to the beginning of the IGMP/MLD message while it is now kept at the beginning of the IPv4/6 header. If there is a querier somewhere else, then this either causes the multicast snooping to stay disabled even though it could be enabled. Or, if we have the querier enabled too, then this can create unnecessary IGMP / MLD query messages on the link. Fixing this by taking the offset between IP and IGMP/MLD header into account, too. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Simon Wunderlich Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 191ea66e4d92..6852f3c7009c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1279,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1289,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1357,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1366,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1376,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } -- cgit v1.2.3 From 1d2f7b2d956e242179aaf4a08f3545f99c81f9a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 May 2016 21:26:08 -0700 Subject: net: ipv6: tcp reset, icmp need to consider L3 domain Responses for packets to unused ports are getting lost with L3 domains. IPv4 has ip_send_unicast_reply for sending TCP responses which accounts for L3 domains; update the IPv6 counterpart tcp_v6_send_response. For icmp the L3 master check needs to be moved up in icmp6_send to properly respond to UDP packets to a port with no listener. Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 5 ++--- net/ipv6/tcp_ipv6.c | 7 ++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7af51..0013cacf7164 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -445,6 +445,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; + else + iif = l3mdev_master_ifindex(skb->dev); /* * Must not send error if the source does not uniquely @@ -499,9 +501,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); - dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 711d209f9124..f443c6b0ce16 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + fl6.flowi6_oif = oif; + } + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; -- cgit v1.2.3 From 8e0ddc040a87a3b700bdf67394d24fe30a0f1eb9 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 5 May 2016 00:35:16 -0400 Subject: qede: prevent chip hang when increasing channels qede requires qed to provide enough resources to accommodate 16 combined channels, but that upper-bound isn't actually being enforced by it. Instead, qed inform back to qede how many channels can be opened based on available resources - but that calculation doesn't really take into account the resources requested by qede; Instead it considers other FW/HW available resources. As a result, if a user would increase the number of channels to more than 16 [e.g., using ethtool] the chip would hang. This change increments the resources requested by qede to 64 combined channels instead of 16; This value is an upper bound on the possible available channels [due to other FW/HW resources]. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 7869465435fa..8d5248c4087f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1938,8 +1938,6 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->q_num_rx_buffers = NUM_RX_BDS_DEF; edev->q_num_tx_buffers = NUM_TX_BDS_DEF; - DP_INFO(edev, "Allocated netdev with 64 tx queues and 64 rx queues\n"); - SET_NETDEV_DEV(ndev, &pdev->dev); memset(&edev->stats, 0, sizeof(edev->stats)); @@ -2090,9 +2088,9 @@ static void qede_update_pf_params(struct qed_dev *cdev) { struct qed_pf_params pf_params; - /* 16 rx + 16 tx */ + /* 64 rx + 64 tx */ memset(&pf_params, 0, sizeof(struct qed_pf_params)); - pf_params.eth_pf_params.num_cons = 32; + pf_params.eth_pf_params.num_cons = 128; qed_ops->common->update_pf_params(cdev, &pf_params); } -- cgit v1.2.3 From 43b8448cd7b42a4c39476c9a12c960c1408f1946 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 3 May 2016 16:10:20 -0700 Subject: udp_tunnel: Remove redundant udp_tunnel_gro_complete(). The setting of the UDP tunnel GSO type is already performed by udp[46]_gro_complete(). Signed-off-by: Jarno Rajahalme Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 -- drivers/net/vxlan.c | 2 -- include/net/udp_tunnel.h | 9 --------- net/ipv4/fou.c | 2 -- 4 files changed, 15 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index bc168894bda3..98f12244714f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -504,8 +504,6 @@ static int geneve_gro_complete(struct sk_buff *skb, int nhoff, int gh_len; int err = -ENOSYS; - udp_tunnel_gro_complete(skb, nhoff); - gh = (struct genevehdr *)(skb->data + nhoff); gh_len = geneve_hlen(gh); type = gh->proto_type; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1c0fa364323e..dd2d032fba5f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -616,8 +616,6 @@ out: static int vxlan_gro_complete(struct sk_buff *skb, int nhoff, struct udp_offload *uoff) { - udp_tunnel_gro_complete(skb, nhoff); - return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index b83114077cee..a1140249ec25 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -106,15 +106,6 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, return iptunnel_handle_offloads(skb, type); } -static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) -{ - struct udphdr *uh; - - uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); - skb_shinfo(skb)->gso_type |= uh->check ? - SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; -} - static inline void udp_tunnel_encap_enable(struct socket *sock) { #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a39068b4a4d9..305d9ac68bd9 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -228,8 +228,6 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, int err = -ENOSYS; const struct net_offload **offloads; - udp_tunnel_gro_complete(skb, nhoff); - rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); -- cgit v1.2.3 From 229740c63169462a838a8b8e16391ed000934631 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 3 May 2016 16:10:21 -0700 Subject: udp_offload: Set encapsulation before inner completes. UDP tunnel segmentation code relies on the inner offsets being set for an UDP tunnel GSO packet, but the inner *_complete() functions will set the inner offsets only if 'encapsulation' is set before calling them. Currently, udp_gro_complete() sets 'encapsulation' only after the inner *_complete() functions are done. This causes the inner offsets having invalid values after udp_gro_complete() returns, which in turn will make it impossible to properly segment the packet in case it needs to be forwarded, which would be visible to the user either as invalid packets being sent or as packet loss. This patch fixes this by setting skb's 'encapsulation' in udp_gro_complete() before calling into the inner complete functions, and by making each possible UDP tunnel gro_complete() callback set the inner_mac_header to the beginning of the tunnel payload. Signed-off-by: Jarno Rajahalme Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/geneve.c | 3 +++ drivers/net/vxlan.c | 3 +++ include/linux/netdevice.h | 3 +++ net/ipv4/fou.c | 4 ++++ net/ipv4/udp_offload.c | 8 +++++--- 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 98f12244714f..7b0a644122eb 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -514,6 +514,9 @@ static int geneve_gro_complete(struct sk_buff *skb, int nhoff, err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); rcu_read_unlock(); + + skb_set_inner_mac_header(skb, nhoff + gh_len); + return err; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index dd2d032fba5f..8ac261ab7d7d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -616,6 +616,9 @@ out: static int vxlan_gro_complete(struct sk_buff *skb, int nhoff, struct udp_offload *uoff) { + /* Sets 'skb->inner_mac_header' since we are always called with + * 'skb->encapsulation' set. + */ return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3c46b019ac1..78181a88903b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2164,6 +2164,9 @@ struct packet_offload { struct udp_offload; +/* 'skb->encapsulation' is set before gro_complete() is called. gro_complete() + * must set 'skb->inner_mac_header' to the beginning of tunnel payload. + */ struct udp_offload_callbacks { struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 305d9ac68bd9..a6962ccad98a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -236,6 +236,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff); + skb_set_inner_mac_header(skb, nhoff); + out_unlock: rcu_read_unlock(); @@ -412,6 +414,8 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + skb_set_inner_mac_header(skb, nhoff + guehlen); + out_unlock: rcu_read_unlock(); return err; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ed2dafb7cc4..e330c0e56b11 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -399,6 +399,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) uh->len = newlen; + /* Set encapsulation before calling into inner gro_complete() functions + * to make them set up the inner offsets. + */ + skb->encapsulation = 1; + rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -421,9 +426,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; - skb->encapsulation = 1; - skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); - return err; } -- cgit v1.2.3 From 51554db2d23f71989e040df838613bbae554d4e0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 6 May 2016 22:18:39 +0200 Subject: mlxsw: spectrum: Fix rollback order in LAG join failure Make the leave procedure in the error path symmetric to the join procedure and first remove the port from the collector before potentially destroying the LAG. Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4afbc3e9e381..668b2f465ca5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2541,11 +2541,11 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, lag->ref_count++; return 0; +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); return err; } -- cgit v1.2.3 From 288928658583a27256c6c295aeccd95a3ddcefce Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 6 May 2016 22:18:40 +0200 Subject: mlxsw: spectrum: Add missing rollback in flood configuration When we fail to set the flooding configuration for the broadcast and unregistered multicast traffic, we should revert the flooding configuration of the unknown unicast traffic. Fixes: 0293038e0c36 ("mlxsw: spectrum: Add support for flood control") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index e1c74efff51a..9cd6f472234a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -214,7 +214,15 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + if (err) + goto err_flood_bm_set; + else + goto buffer_out; +err_flood_bm_set: + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, + table_type, range, local_port, !set); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: kfree(sftr_pl); return err; -- cgit v1.2.3 From 545fea54916af5e88f02346d85301ba0ecc143f3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 May 2016 16:18:46 +0300 Subject: netxen: fix error handling in netxen_get_flash_block() My static checker complained that "v" can be used unintialized if netxen_rom_fast_read() returns -EIO. That function never actually returns -1. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index db80eb1c6d4f..a3205410bb60 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -1015,20 +1015,24 @@ static int netxen_get_flash_block(struct netxen_adapter *adapter, int base, { int i, v, addr; __le32 *ptr32; + int ret; addr = base; ptr32 = buf; for (i = 0; i < size / sizeof(u32); i++) { - if (netxen_rom_fast_read(adapter, addr, &v) == -1) - return -1; + ret = netxen_rom_fast_read(adapter, addr, &v); + if (ret) + return ret; + *ptr32 = cpu_to_le32(v); ptr32++; addr += sizeof(u32); } if ((char *)buf + size > (char *)ptr32) { __le32 local; - if (netxen_rom_fast_read(adapter, addr, &v) == -1) - return -1; + ret = netxen_rom_fast_read(adapter, addr, &v); + if (ret) + return ret; local = cpu_to_le32(v); memcpy(ptr32, &local, (char *)buf + size - (char *)ptr32); } -- cgit v1.2.3 From 1c755ffa4fa3e2d4112cadd30142344789ad2fd2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 May 2016 16:19:44 +0300 Subject: netxen: reversed condition in netxen_nic_set_link_parameters() My static checker complains that we are using "autoneg" without initializing it. The problem is the ->phy_read() condition is reversed so we only set this on error instead of success. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index a3205410bb60..2b10f1bcd151 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -1944,7 +1944,7 @@ void netxen_nic_set_link_parameters(struct netxen_adapter *adapter) if (adapter->phy_read && adapter->phy_read(adapter, NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - &autoneg) != 0) + &autoneg) == 0) adapter->link_autoneg = autoneg; } else goto link_down; -- cgit v1.2.3 From 8c1f454625743d60763a23c58fa65681d3c98b6f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 May 2016 16:20:20 +0300 Subject: netxen: netxen_rom_fast_read() doesn't return -1 The error handling is broken here. netxen_rom_fast_read() returns zero on success and -EIO on error. It never returns -1. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index fd362b6923f4..9c6eed9b45f7 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -852,7 +852,8 @@ netxen_check_options(struct netxen_adapter *adapter) ptr32 = (__le32 *)&serial_num; offset = NX_FW_SERIAL_NUM_OFFSET; for (i = 0; i < 8; i++) { - if (netxen_rom_fast_read(adapter, offset, &val) == -1) { + err = netxen_rom_fast_read(adapter, offset, &val); + if (err) { dev_err(&pdev->dev, "error reading board info\n"); adapter->driver_mismatch = 1; return; -- cgit v1.2.3 From 810810ffb2f6d46365d0790bbe77698a5534393a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 May 2016 16:21:30 +0300 Subject: qede: uninitialized variable in qede_start_xmit() "data_split" was never set to false. It's just uninitialized. Fixes: 2950219d87b0 ('qede: Add basic network device support') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 8d5248c4087f..12f6615797de 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -421,7 +421,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, u8 xmit_type; u16 idx; u16 hlen; - bool data_split; + bool data_split = false; /* Get tx-queue context and netdev index */ txq_index = skb_get_queue_mapping(skb); -- cgit v1.2.3 From 25a54342fde903b6abc2680594cf3e4864686339 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 May 2016 23:39:33 +0100 Subject: tools: bpf_jit_disasm: check for klogctl failure klogctl can fail and return -ve len, so check for this and return NULL to avoid passing a (size_t)-1 to malloc. Signed-off-by: Colin Ian King Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/net/bpf_jit_disasm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index 5b3241340945..544b05a53b70 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -98,6 +98,9 @@ static char *get_klog_buff(unsigned int *klen) char *buff; len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0); + if (len < 0) + return NULL; + buff = malloc(len); if (!buff) return NULL; -- cgit v1.2.3 From be0bd3160165e42783d8215f426e41c07179c08a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 May 2016 05:58:21 -0700 Subject: macvtap: segmented packet is consumed If GSO packet is segmented and its segments are properly queued, we call consume_skb() instead of kfree_skb() to be drop monitor friendly. Fixes: 3e4f8b7873709 ("macvtap: Perform GSO on forwarding path.") Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 95394edd1ed5..9a35aa462314 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -373,7 +373,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) goto wake_up; } - kfree_skb(skb); + consume_skb(skb); while (segs) { struct sk_buff *nskb = segs->next; -- cgit v1.2.3 From c81aa7979432aa10f23656ef6fa113764eab5e5c Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 6 May 2016 20:27:43 +0300 Subject: Documentation/networking: more accurate LCO explanation In few places the term "ones-complement sum" was used but the actual meaning is "the complement of the ones-complement sum". Also, avoid enclosing long statements with underscore, to ease readability. Signed-off-by: Shmulik Ladkani Acked-by: Edward Cree Signed-off-by: David S. Miller --- Documentation/networking/checksum-offloads.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt index de2a327766a7..56e36861245f 100644 --- a/Documentation/networking/checksum-offloads.txt +++ b/Documentation/networking/checksum-offloads.txt @@ -69,18 +69,18 @@ LCO: Local Checksum Offload LCO is a technique for efficiently computing the outer checksum of an encapsulated datagram when the inner checksum is due to be offloaded. The ones-complement sum of a correctly checksummed TCP or UDP packet is - equal to the sum of the pseudo header, because everything else gets - 'cancelled out' by the checksum field. This is because the sum was + equal to the complement of the sum of the pseudo header, because everything + else gets 'cancelled out' by the checksum field. This is because the sum was complemented before being written to the checksum field. More generally, this holds in any case where the 'IP-style' ones complement checksum is used, and thus any checksum that TX Checksum Offload supports. That is, if we have set up TX Checksum Offload with a start/offset pair, we - know that _after the device has filled in that checksum_, the ones + know that after the device has filled in that checksum, the ones complement sum from csum_start to the end of the packet will be equal to - _whatever value we put in the checksum field beforehand_. This allows us - to compute the outer checksum without looking at the payload: we simply - stop summing when we get to csum_start, then add the 16-bit word at - (csum_start + csum_offset). + the complement of whatever value we put in the checksum field beforehand. + This allows us to compute the outer checksum without looking at the payload: + we simply stop summing when we get to csum_start, then add the complement of + the 16-bit word at (csum_start + csum_offset). Then, when the true inner checksum is filled in (either by hardware or by skb_checksum_help()), the outer checksum will become correct by virtue of the arithmetic. -- cgit v1.2.3 From 8acca6acebd07b238af2e61e4f7d55e6232c7e3a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sat, 7 May 2016 20:19:29 +0200 Subject: macsec: key identifier is 128 bits, not 64 The MACsec standard mentions a key identifier for each key, but doesn't specify anything about it, so I arbitrarily chose 64 bits. IEEE 802.1X-2010 specifies MKA (MACsec Key Agreement), and defines the key identifier to be 128 bits (96 bits "member identifier" + 32 bits "key number"). Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 19 +++++++++++++------ include/uapi/linux/if_macsec.h | 4 +++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c6385617bfb2..92eaab95ae2b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -85,7 +85,7 @@ struct gcm_iv { * @tfm: crypto struct, key storage */ struct macsec_key { - u64 id; + u8 id[MACSEC_KEYID_LEN]; struct crypto_aead *tfm; }; @@ -1529,7 +1529,8 @@ static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_PN] = { .type = NLA_U32 }, - [MACSEC_SA_ATTR_KEYID] = { .type = NLA_U64 }, + [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, + .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, .len = MACSEC_MAX_KEY_LEN, }, }; @@ -1576,6 +1577,9 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; } + if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) + return false; + return true; } @@ -1641,7 +1645,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); - rx_sa->key.id = nla_get_u64(tb_sa[MACSEC_SA_ATTR_KEYID]); + nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN); rx_sa->sc = rx_sc; rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); @@ -1722,6 +1726,9 @@ static bool validate_add_txsa(struct nlattr **attrs) return false; } + if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) + return false; + return true; } @@ -1777,7 +1784,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; } - tx_sa->key.id = nla_get_u64(tb_sa[MACSEC_SA_ATTR_KEYID]); + nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN); spin_lock_bh(&tx_sa->lock); tx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); @@ -2318,7 +2325,7 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, tx_sa->next_pn) || - nla_put_u64(skb, MACSEC_SA_ATTR_KEYID, tx_sa->key.id) || + nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, tx_sa->key.id) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); @@ -2419,7 +2426,7 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, rx_sa->next_pn) || - nla_put_u64(skb, MACSEC_SA_ATTR_KEYID, rx_sa->key.id) || + nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, rx_sa->key.id) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 4c58d9917aa4..3411ed06b9c0 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -19,6 +19,8 @@ #define MACSEC_MAX_KEY_LEN 128 +#define MACSEC_KEYID_LEN 16 + #define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL #define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL @@ -77,7 +79,7 @@ enum macsec_sa_attrs { MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ MACSEC_SA_ATTR_PN, /* config/dump, u32 */ MACSEC_SA_ATTR_KEY, /* config, data */ - MACSEC_SA_ATTR_KEYID, /* config/dump, u64 */ + MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ __MACSEC_SA_ATTR_END, NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, -- cgit v1.2.3 From 7fd7406d9c7e3c6f235aec224a811dc0fd29e049 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 8 May 2016 14:55:24 +0300 Subject: Revert "net/mlx5: Kconfig: Fix MLX5_EN/VXLAN build issue" This reverts commit 69976fb1045850a742deb9790ea49cbc6f497531. We cannot select VXLAN when IPv4 support is disabled, that just gives us additional build errors, including: warning: (MLX5_CORE_EN) selects VXLAN which has unmet direct dependencies (NETDEVICES && NET_CORE && INET) In file included from ../drivers/net/vxlan.c:36:0: include/net/udp_tunnel.h: In function 'udp_tunnel_handle_offloads': include/net/udp_tunnel.h:112:9: error: implicit declaration of function 'iptunnel_handle_offloads' [-Werror=implicit-function-declaration] return iptunnel_handle_offloads(skb, type); ^~~~~~~~~~~~~~~~~~~~~~~~ I'm sending a proper fix for the original bug in a separate patch. Signed-off-by: Arnd Bergmann Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 559d11a443bc..1cf722eba607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -14,7 +14,6 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE select PTP_1588_CLOCK - select VXLAN if MLX5_CORE=y default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. -- cgit v1.2.3 From 7dbb29172d415ccccad1166700d6be78dee9f2bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 8 May 2016 14:55:25 +0300 Subject: net/mlx5e: make VXLAN support conditional VXLAN can be disabled at compile-time or it can be a loadable module while mlx5 is built-in, which leads to a link error: drivers/net/built-in.o: In function `mlx5e_create_netdev': ntb_netdev.c:(.text+0x106de4): undefined reference to `vxlan_get_rx_port' This avoids the link error and makes the vxlan code optional, like the other ethernet drivers do as well. Link: https://patchwork.ozlabs.org/patch/589296/ Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Arnd Bergmann Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/vxlan.h | 11 +++++++++-- 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722eba607..f5c3b9465d8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,3 +31,10 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y + +config MLX5_CORE_EN_VXLAN + bool "VXLAN offloads Support" + default y + depends on MLX5_CORE_EN && VXLAN && !(MLX5_CORE=y && VXLAN=m) + ---help--- + Say Y here if you want to use VXLAN offloads in the driver. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4fc45ee0c5d1..bf65b71c7360 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -6,6 +6,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o vxlan.o en_tc.o + en_txrx.o en_clock.o en_tc.o +mlx5_core-$(CONFIG_MLX5_CORE_EN_VXLAN) += vxlan.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3881dce0cc30..24344aafbd36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -564,7 +564,9 @@ struct mlx5e_priv { struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; +#ifdef CONFIG_MLX5_CORE_EN_VXLAN struct mlx5e_vxlan_db vxlan; +#endif struct mlx5e_params params; struct workqueue_struct *wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d4dfc5ce516a..94fef705890b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2149,6 +2149,7 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } +#if IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) static void mlx5e_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, __be16 port) { @@ -2220,6 +2221,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +#endif static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, @@ -2251,9 +2253,11 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, +#ifdef CONFIG_MLX5_CORE_EN_VXLAN .ndo_add_vxlan_port = mlx5e_add_vxlan_port, .ndo_del_vxlan_port = mlx5e_del_vxlan_port, .ndo_features_check = mlx5e_features_check, +#endif .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_get_vf_config = mlx5e_get_vf_config, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 129f3527aa14..217ac530a514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -48,14 +48,21 @@ struct mlx5e_vxlan_work { static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev) { - return (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && + return IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) && + (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && mlx5_core_is_pf(mdev)); } +#ifdef CONFIG_MLX5_CORE_EN_VXLAN void mlx5e_vxlan_init(struct mlx5e_priv *priv); +void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); +#else +static inline void mlx5e_vxlan_init(struct mlx5e_priv *priv) {} +static inline void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) {} +#endif + void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family, u16 port, int add); struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port); -void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); #endif /* __MLX5_VXLAN_H__ */ -- cgit v1.2.3