From e4961b0768852d9eb7383e1a5df178eacb714656 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Oct 2016 16:57:08 +0300 Subject: net: core: Correctly iterate over lower adjacency list Tamir reported the following trace when processing ARP requests received via a vlan device on top of a VLAN-aware bridge: NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0] [...] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.8.0-rc7 #1 Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016 task: ffff88017edfea40 task.stack: ffff88017ee10000 RIP: 0010:[] [] netdev_all_lower_get_next_rcu+0x33/0x60 [...] Call Trace: [] mlxsw_sp_port_lower_dev_hold+0x5a/0xa0 [mlxsw_spectrum] [] mlxsw_sp_router_netevent_event+0x80/0x150 [mlxsw_spectrum] [] notifier_call_chain+0x4a/0x70 [] atomic_notifier_call_chain+0x1a/0x20 [] call_netevent_notifiers+0x1b/0x20 [] neigh_update+0x306/0x740 [] neigh_event_ns+0x4e/0xb0 [] arp_process+0x66f/0x700 [] ? common_interrupt+0x8c/0x8c [] arp_rcv+0x139/0x1d0 [] ? vlan_do_receive+0xda/0x320 [] __netif_receive_skb_core+0x524/0xab0 [] ? dev_queue_xmit+0x10/0x20 [] ? br_forward_finish+0x3d/0xc0 [bridge] [] ? br_handle_vlan+0xf6/0x1b0 [bridge] [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] br_pass_frame_up+0xc6/0x160 [bridge] [] ? deliver_clone+0x37/0x50 [bridge] [] ? br_flood+0xcc/0x160 [bridge] [] br_handle_frame_finish+0x224/0x4f0 [bridge] [] br_handle_frame+0x174/0x300 [bridge] [] __netif_receive_skb_core+0x329/0xab0 [] ? find_next_bit+0x15/0x20 [] ? cpumask_next_and+0x32/0x50 [] ? load_balance+0x178/0x9b0 [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] mlxsw_sp_rx_listener_func+0x61/0xb0 [mlxsw_spectrum] [] mlxsw_core_skb_receive+0x187/0x200 [mlxsw_core] [] mlxsw_pci_cq_tasklet+0x63a/0x9b0 [mlxsw_pci] [] tasklet_action+0xf6/0x110 [] __do_softirq+0xf6/0x280 [] irq_exit+0xdf/0xf0 [] do_IRQ+0x54/0xd0 [] common_interrupt+0x8c/0x8c The problem is that netdev_all_lower_get_next_rcu() never advances the iterator, thereby causing the loop over the lower adjacency list to run forever. Fix this by advancing the iterator and avoid the infinite loop. Fixes: 7ce856aaaf13 ("mlxsw: spectrum: Add couple of lower device helper functions") Signed-off-by: Ido Schimmel Reported-by: Tamir Winetroub Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f1fe26f66458..b09ac57f4348 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5511,10 +5511,14 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = &lower->list; - return lower ? lower->dev : NULL; + return lower->dev; } EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); -- cgit v1.2.3 From fcd91dd449867c6bfe56a81cabba76b829fd05cd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 20 Oct 2016 15:58:02 +0200 Subject: net: add recursion limit to GRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, GRO can do unlimited recursion through the gro_receive handlers. This was fixed for tunneling protocols by limiting tunnel GRO to one level with encap_mark, but both VLAN and TEB still have this problem. Thus, the kernel is vulnerable to a stack overflow, if we receive a packet composed entirely of VLAN headers. This patch adds a recursion counter to the GRO layer to prevent stack overflow. When a gro_receive function hits the recursion limit, GRO is aborted for this skb and it is processed normally. This recursion counter is put in the GRO CB, but could be turned into a percpu counter if we run out of space in the CB. Thanks to Vladimír Beneš for the initial bug report. Fixes: CVE-2016-7039 Fixes: 9b174d88c257 ("net: Add Transparent Ethernet Bridging GRO support.") Fixes: 66e5133f19e9 ("vlan: Add GRO support for non hardware accelerated vlan") Signed-off-by: Sabrina Dubroca Reviewed-by: Jiri Benc Acked-by: Hannes Frederic Sowa Acked-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/linux/netdevice.h | 39 ++++++++++++++++++++++++++++++++++++++- net/8021q/vlan.c | 2 +- net/core/dev.c | 1 + net/ethernet/eth.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/fou.c | 4 ++-- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- 11 files changed, 49 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3c20e87bb761..16af1ce99233 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -453,7 +453,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk, skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e7d16687538b..c1639a3e95a4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -583,7 +583,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, } } - pp = eth_gro_receive(head, skb); + pp = call_gro_receive(eth_gro_receive, head, skb); flush = 0; out: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 465e128699a1..91ee3643ccc8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2169,7 +2169,10 @@ struct napi_gro_cb { /* Used to determine if flush_id can be ignored */ u8 is_atomic:1; - /* 5 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 1 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2180,6 +2183,40 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **, + struct sk_buff *); +static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8de138d3306b..f2531ad66b68 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -664,7 +664,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index b09ac57f4348..dbc871306910 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4511,6 +4511,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 66dff5e3d772..02acfff36028 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -439,7 +439,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1effc986739e..9648c97e541f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,7 +1391,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e2b012..030d1531e897 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -441,7 +441,7 @@ next_proto: if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 96e0efecefa6..d5cac99170b1 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9333c963607..b2be1d9757ef 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -295,7 +295,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = udp_sk(sk)->gro_receive(sk, head, skb); + pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e7bfd55899a3..1fcf61f1cbc3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); -- cgit v1.2.3 From 104ba78c98808ae837d1f63aae58c183db5505df Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 26 Oct 2016 11:23:07 -0400 Subject: packet: on direct_xmit, limit tso and csum to supported devices When transmitting on a packet socket with PACKET_VNET_HDR and PACKET_QDISC_BYPASS, validate device support for features requested in vnet_hdr. Drop TSO packets sent to devices that do not support TSO or have the feature disabled. Note that the latter currently do process those packets correctly, regardless of not advertising the feature. Because of SKB_GSO_DODGY, it is not sufficient to test device features with netif_needs_gso. Full validate_xmit_skb is needed. Switch to software checksum for non-TSO packets that request checksum offload if that device feature is unsupported or disabled. Note that similar to the TSO case, device drivers may perform checksum offload correctly even when not advertising it. When switching to software checksum, packets hit skb_checksum_help, which has two BUG_ON checksum not in linear segment. Packet sockets always allocate at least up to csum_start + csum_off + 2 as linear. Tested by running github.com/wdebruij/kerneltools/psock_txring_vnet.c ethtool -K eth0 tso off tx on psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v -N ethtool -K eth0 tx off psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G -N v2: - add EXPORT_SYMBOL_GPL(validate_xmit_skb_list) Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/packet/af_packet.c | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index dbc871306910..f745112f7efa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3035,6 +3035,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 11db0d619c00..d2238b204691 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } -- cgit v1.2.3