summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-01-21 18:05:12 -0800
committerDavid S. Miller <davem@davemloft.net>2014-01-21 18:05:12 -0800
commit70fccb7e34f029755a5371b6b786f8425c2f27a1 (patch)
tree78986427fd32829b74d9947612eb2e12913cc768 /net
parent2618abb73c8953f0848511fc13f68da4d8337574 (diff)
parentdc01e7d3447793fd9e4090aa9d50c549848b5a18 (diff)
downloadlinux-70fccb7e34f029755a5371b6b786f8425c2f27a1.tar.bz2
Merge branch 'gro_udp_encap'
Or Gerlitz says: ==================== net: Add GRO support for UDP encapsulating protocols This series adds GRO handlers for protocols that do UDP encapsulation, with the intent of being able to coalesce packets which encapsulate packets belonging to the same TCP session. For GRO purposes, the destination UDP port takes the role of the ether type field in the ethernet header or the next protocol in the IP header. The UDP GRO handler will only attempt to coalesce packets whose destination port is registered to have gro handler. The patches done against net-next 75e4364f67 "net: stmmac: fix NULL pointer dereference in stmmac_get_tx_hwtstamp" Or. v4 --> v5 changes: - followed Eric's directives to avoid using atomic get/put ops on the udp gro receive and complete callbacks and instead keep the rcu_read_lock when calling the next handler on the chain. v3 --> v4 changes: - applied feedback from Tom on some micro-optimizations that save branches and goto directives in the udp gro logic - applied feedback from Eric on correct RCU programming for the add/remove flow of the upper protocols udp gro handlers v2 --> v3 changes: - moved to use linked list to store the udp gro handlers, this solves the problem of consuming 512KB of memory for the handlers. - use a mark on the skb GRO CB data to disallow running the udp gro_receive twice on a packet, this solves the problem of udp encapsulated packets whose inner VM packet is udp and happen to carry a port which has registered offloads - and flush it. - invoke the udp offload protocol registration and de-registration from the vxlan driver in a sleepable context ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c3
-rw-r--r--net/ipv4/udp_offload.c143
2 files changed, 146 insertions, 0 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a578af589198..d89931bae25b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3893,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
+ NAPI_GRO_CB(skb)->udp_mark = 0;
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
@@ -3983,6 +3984,7 @@ struct packet_offload *gro_find_receive_by_type(__be16 type)
}
return NULL;
}
+EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
@@ -3996,6 +3998,7 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
return NULL;
}
+EXPORT_SYMBOL(gro_find_complete_by_type);
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 79c62bdcd3c5..ee853c55deea 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -14,6 +14,15 @@
#include <net/udp.h>
#include <net/protocol.h>
+static DEFINE_SPINLOCK(udp_offload_lock);
+static struct udp_offload_priv *udp_offload_base __read_mostly;
+
+struct udp_offload_priv {
+ struct udp_offload *offload;
+ struct rcu_head rcu;
+ struct udp_offload_priv __rcu *next;
+};
+
static int udp4_ufo_send_check(struct sk_buff *skb)
{
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -89,10 +98,144 @@ out:
return segs;
}
+int udp_add_offload(struct udp_offload *uo)
+{
+ struct udp_offload_priv **head = &udp_offload_base;
+ struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
+
+ if (!new_offload)
+ return -ENOMEM;
+
+ new_offload->offload = uo;
+
+ spin_lock(&udp_offload_lock);
+ rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
+ rcu_assign_pointer(*head, rcu_dereference(new_offload));
+ spin_unlock(&udp_offload_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(udp_add_offload);
+
+static void udp_offload_free_routine(struct rcu_head *head)
+{
+ struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
+ kfree(ou_priv);
+}
+
+void udp_del_offload(struct udp_offload *uo)
+{
+ struct udp_offload_priv __rcu **head = &udp_offload_base;
+ struct udp_offload_priv *uo_priv;
+
+ spin_lock(&udp_offload_lock);
+
+ uo_priv = rcu_dereference(*head);
+ for (; uo_priv != NULL;
+ uo_priv = rcu_dereference(*head)) {
+
+ if (uo_priv->offload == uo) {
+ rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
+ goto unlock;
+ }
+ head = &uo_priv->next;
+ }
+ pr_warn("udp_del_offload: didn't find offload for port %d\n", htons(uo->port));
+unlock:
+ spin_unlock(&udp_offload_lock);
+ if (uo_priv != NULL)
+ call_rcu(&uo_priv->rcu, udp_offload_free_routine);
+}
+EXPORT_SYMBOL(udp_del_offload);
+
+static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct udp_offload_priv *uo_priv;
+ struct sk_buff *p, **pp = NULL;
+ struct udphdr *uh, *uh2;
+ unsigned int hlen, off;
+ int flush = 1;
+
+ if (NAPI_GRO_CB(skb)->udp_mark ||
+ (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
+ goto out;
+
+ /* mark that this skb passed once through the udp gro layer */
+ NAPI_GRO_CB(skb)->udp_mark = 1;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*uh);
+ uh = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ uh = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!uh))
+ goto out;
+ }
+
+ rcu_read_lock();
+ uo_priv = rcu_dereference(udp_offload_base);
+ for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
+ if (uo_priv->offload->port == uh->dest &&
+ uo_priv->offload->callbacks.gro_receive)
+ goto unflush;
+ }
+ goto out_unlock;
+
+unflush:
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ uh2 = (struct udphdr *)(p->data + off);
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+ pp = uo_priv->offload->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+ return pp;
+}
+
+static int udp_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct udp_offload_priv *uo_priv;
+ __be16 newlen = htons(skb->len - nhoff);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ int err = -ENOSYS;
+
+ uh->len = newlen;
+
+ rcu_read_lock();
+
+ uo_priv = rcu_dereference(udp_offload_base);
+ for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
+ if (uo_priv->offload->port == uh->dest &&
+ uo_priv->offload->callbacks.gro_complete)
+ break;
+ }
+
+ if (uo_priv != NULL)
+ err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+
+ rcu_read_unlock();
+ return err;
+}
+
static const struct net_offload udpv4_offload = {
.callbacks = {
.gso_send_check = udp4_ufo_send_check,
.gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp_gro_receive,
+ .gro_complete = udp_gro_complete,
},
};