summaryrefslogtreecommitdiffstats
path: root/net/ipv6/ip6_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/ip6_output.c')
-rw-r--r--net/ipv6/ip6_output.c158
1 files changed, 96 insertions, 62 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 36cf0ab685a0..d5f7716662db 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,7 +56,7 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-static int ip6_finish_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
@@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
+ if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_socket(dev_net(dev), skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
@@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- newskb, NULL, newskb->dev,
+ sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
-static int ip6_finish_output(struct sk_buff *skb)
+static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
- return ip6_fragment(skb, ip6_finish_output2);
+ return ip6_fragment(sk, skb, ip6_finish_output2);
else
- return ip6_finish_output2(skb);
+ return ip6_finish_output2(sk, skb);
}
int ip6_output(struct sock *sk, struct sk_buff *skb)
@@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb)
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
+ NULL, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
@@ -177,7 +178,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (skb_headroom(skb) < head_room) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (skb2 == NULL) {
+ if (!skb2) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- dst->dev, dst_output);
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, dst->dev, dst_output_sk);
}
skb->dev = dst->dev;
@@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
return 0;
}
-static inline int ip6_forward_finish(struct sk_buff *skb)
+static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
{
skb_sender_cpu_clear(skb);
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -458,7 +459,7 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
@@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb)
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
+ skb->dev, dst->dev,
ip6_forward_finish);
error:
@@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -548,7 +551,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
int hroom, troom;
- __be32 frag_id = 0;
+ __be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -561,18 +564,17 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->ignore_df && skb->len > mtu) ||
- (IP6CB(skb)->frag_max_size &&
- IP6CB(skb)->frag_max_size > mtu)) {
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+ if (unlikely(!skb->ignore_df && skb->len > mtu))
+ goto fail_toobig;
- skb->dev = skb_dst(skb)->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
- return -EMSGSIZE;
+ if (IP6CB(skb)->frag_max_size) {
+ if (IP6CB(skb)->frag_max_size > mtu)
+ goto fail_toobig;
+
+ /* don't send fragments larger than what we received */
+ mtu = IP6CB(skb)->frag_max_size;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
}
if (np && np->frag_size < mtu) {
@@ -581,6 +583,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
}
mtu -= hlen + sizeof(struct frag_hdr);
+ frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr);
+
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
@@ -629,11 +634,10 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
- ipv6_select_ident(fh, rt);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
- frag_id = fh->identification;
+ fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
@@ -658,7 +662,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(offset);
- if (frag->next != NULL)
+ if (frag->next)
fh->frag_off |= htons(IP6_MF);
fh->identification = frag_id;
ipv6_hdr(frag)->payload_len =
@@ -667,7 +671,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
ip6_copy_metadata(frag, skb);
}
- err = output(skb);
+ err = output(sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
@@ -775,11 +779,7 @@ slow_path:
*/
fh->nexthdr = nexthdr;
fh->reserved = 0;
- if (!frag_id) {
- ipv6_select_ident(fh, rt);
- frag_id = fh->identification;
- } else
- fh->identification = frag_id;
+ fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
@@ -800,7 +800,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- err = output(frag);
+ err = output(sk, frag);
if (err)
goto fail;
@@ -812,6 +812,14 @@ slow_path:
consume_skb(skb);
return err;
+fail_toobig:
+ if (skb->sk && dst_allfrag(skb_dst(skb)))
+ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+
+ skb->dev = skb_dst(skb)->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ err = -EMSGSIZE;
+
fail:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
@@ -824,7 +832,7 @@ static inline int ip6_rt_check(const struct rt6key *rt_key,
const struct in6_addr *addr_cache)
{
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
- (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
+ (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
}
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
@@ -883,22 +891,45 @@ static int ip6_dst_lookup_tail(struct sock *sk,
#endif
int err;
- if (*dst == NULL)
- *dst = ip6_route_output(net, sk, fl6);
-
- err = (*dst)->error;
- if (err)
- goto out_err_release;
+ /* The correct way to handle this would be to do
+ * ip6_route_get_saddr, and then ip6_route_output; however,
+ * the route-specific preferred source forces the
+ * ip6_route_output call _before_ ip6_route_get_saddr.
+ *
+ * In source specific routing (no src=any default route),
+ * ip6_route_output will fail given src=any saddr, though, so
+ * that's why we try it again later.
+ */
+ if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ struct rt6_info *rt;
+ bool had_dst = *dst != NULL;
- if (ipv6_addr_any(&fl6->saddr)) {
- struct rt6_info *rt = (struct rt6_info *) *dst;
+ if (!had_dst)
+ *dst = ip6_route_output(net, sk, fl6);
+ rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
err = ip6_route_get_saddr(net, rt, &fl6->daddr,
sk ? inet6_sk(sk)->srcprefs : 0,
&fl6->saddr);
if (err)
goto out_err_release;
+
+ /* If we had an erroneous initial result, pretend it
+ * never existed and let the SA-enabled version take
+ * over.
+ */
+ if (!had_dst && (*dst)->error) {
+ dst_release(*dst);
+ *dst = NULL;
+ }
}
+ if (!*dst)
+ *dst = ip6_route_output(net, sk, fl6);
+
+ err = (*dst)->error;
+ if (err)
+ goto out_err_release;
+
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
* Here if the dst entry we've looked up
@@ -910,7 +941,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev,
+ rt6_nexthop(rt, &fl6->daddr));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -1034,11 +1066,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
int transhdrlen, int mtu, unsigned int flags,
- struct rt6_info *rt)
+ const struct flowi6 *fl6)
{
struct sk_buff *skb;
- struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1046,11 +1077,11 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* udp datagram
*/
skb = skb_peek_tail(queue);
- if (skb == NULL) {
+ if (!skb) {
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
- if (skb == NULL)
+ if (!skb)
return err;
/* reserve space for Hardware header */
@@ -1080,8 +1111,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
sizeof(struct frag_hdr)) & ~7;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(&fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+ skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
+ &fl6->daddr,
+ &fl6->saddr);
append:
return skb_append_datato_frags(sk, skb, getfrag, from,
@@ -1108,7 +1140,7 @@ static void ip6_append_data_mtu(unsigned int *mtu,
unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
- if (skb == NULL) {
+ if (!skb) {
/* first fragment, reserve header_len */
*mtu = orig_mtu - rt->dst.header_len;
@@ -1140,7 +1172,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
return -EINVAL;
v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
- if (unlikely(v6_cork->opt == NULL))
+ if (unlikely(!v6_cork->opt))
return -ENOBUFS;
v6_cork->opt->tot_len = opt->tot_len;
@@ -1274,8 +1306,10 @@ emsgsize:
/* If this is the first and only packet and device
* supports checksum offloading, let's use it.
+ * Use transhdrlen, same as IPv4, because partial
+ * sums only work when transhdrlen is set.
*/
- if (!skb && sk->sk_protocol == IPPROTO_UDP &&
+ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
length + fragheaderlen < mtu &&
rt->dst.dev->features & NETIF_F_V6_CSUM &&
!exthdrlen)
@@ -1304,7 +1338,7 @@ emsgsize:
(sk->sk_type == SOCK_DGRAM)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
+ transhdrlen, mtu, flags, fl6);
if (err)
goto error;
return 0;
@@ -1332,7 +1366,7 @@ alloc_new_skb:
else
fraggap = 0;
/* update mtu and maxfraglen if necessary */
- if (skb == NULL || skb_prev == NULL)
+ if (!skb || !skb_prev)
ip6_append_data_mtu(&mtu, &maxfraglen,
fragheaderlen, skb, rt,
orig_mtu);
@@ -1384,10 +1418,10 @@ alloc_new_skb:
skb = sock_wmalloc(sk,
alloclen + hh_len, 1,
sk->sk_allocation);
- if (unlikely(skb == NULL))
+ if (unlikely(!skb))
err = -ENOBUFS;
}
- if (skb == NULL)
+ if (!skb)
goto error;
/*
* Fill in the control structures
@@ -1579,7 +1613,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
unsigned char proto = fl6->flowi6_proto;
skb = __skb_dequeue(queue);
- if (skb == NULL)
+ if (!skb)
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);