summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c148
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c1
-rw-r--r--net/ipv6/icmp.c13
-rw-r--r--net/ipv6/ip6_gre.c13
-rw-r--r--net/ipv6/ip6_output.c32
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6_vti.c55
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c13
-rw-r--r--net/ipv6/ndisc.c58
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c49
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c417
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c4
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c14
-rw-r--r--net/ipv6/reassembly.c90
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c9
-rw-r--r--net/ipv6/tcp_ipv6.c209
-rw-r--r--net/ipv6/udp.c131
25 files changed, 871 insertions, 421 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5667b3003af9..0b239fc1816e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -108,11 +108,12 @@ static inline u32 cstamp_delta(unsigned long cstamp)
}
#ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev);
+static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
#else
-static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+static inline int addrconf_sysctl_register(struct inet6_dev *idev)
{
+ return 0;
}
static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -186,6 +187,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -222,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -308,16 +311,16 @@ err_ip:
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
+ int err = -ENOMEM;
ASSERT_RTNL();
if (dev->mtu < IPV6_MIN_MTU)
- return NULL;
+ return ERR_PTR(-EINVAL);
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
-
if (ndev == NULL)
- return NULL;
+ return ERR_PTR(err);
rwlock_init(&ndev->lock);
ndev->dev = dev;
@@ -330,7 +333,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (ndev->nd_parms == NULL) {
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
@@ -344,17 +347,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (snmp6_register_dev(ndev) < 0) {
ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
- neigh_parms_release(&nd_tbl, ndev->nd_parms);
- ndev->dead = 1;
- in6_dev_finish_destroy(ndev);
- return NULL;
+ goto err_release;
}
/* One reference from device. We must do this before
@@ -392,7 +392,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
- addrconf_sysctl_register(ndev);
+ err = addrconf_sysctl_register(ndev);
+ if (err) {
+ ipv6_mc_destroy_dev(ndev);
+ del_timer(&ndev->regen_timer);
+ goto err_release;
+ }
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
@@ -407,6 +412,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
return ndev;
+
+err_release:
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return ERR_PTR(err);
}
static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
@@ -418,7 +429,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
idev = __in6_dev_get(dev);
if (!idev) {
idev = ipv6_add_dev(dev);
- if (!idev)
+ if (IS_ERR(idev))
return NULL;
}
@@ -2728,9 +2739,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
}
}
+static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
+{
+ if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ /* addrconf_add_linklocal also adds a prefix_route and we
+ * only need to care about prefix routes if ipv6_generate_eui64
+ * couldn't generate one.
+ */
+ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
+ addrconf_add_linklocal(idev, &addr);
+ else if (prefix_route)
+ addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ }
+}
+
static void addrconf_dev_config(struct net_device *dev)
{
- struct in6_addr addr;
struct inet6_dev *idev;
ASSERT_RTNL();
@@ -2751,11 +2778,7 @@ static void addrconf_dev_config(struct net_device *dev)
if (IS_ERR(idev))
return;
- memset(&addr, 0, sizeof(struct in6_addr));
- addr.s6_addr32[0] = htonl(0xFE800000);
-
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
}
#if IS_ENABLED(CONFIG_IPV6_SIT)
@@ -2777,11 +2800,7 @@ static void addrconf_sit_config(struct net_device *dev)
}
if (dev->priv_flags & IFF_ISATAP) {
- struct in6_addr addr;
-
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
return;
}
@@ -2796,7 +2815,6 @@ static void addrconf_sit_config(struct net_device *dev)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
- struct in6_addr addr;
ASSERT_RTNL();
@@ -2805,11 +2823,7 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
- else
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
+ addrconf_addr_gen(idev, true);
}
#endif
@@ -2825,8 +2839,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (!idev)
- return notifier_from_errno(-ENOMEM);
+ if (IS_ERR(idev))
+ return notifier_from_errno(PTR_ERR(idev));
}
break;
@@ -2846,7 +2860,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU)
idev = ipv6_add_dev(dev);
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
idev->if_flags |= IF_READY;
run_pending = 1;
}
@@ -2889,7 +2903,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
addrconf_dad_run(idev);
@@ -2924,7 +2938,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (idev)
+ if (!IS_ERR(idev))
break;
}
@@ -2945,10 +2959,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
snmp6_unregister_dev(idev);
addrconf_sysctl_unregister(idev);
- addrconf_sysctl_register(idev);
- err = snmp6_register_dev(idev);
+ err = addrconf_sysctl_register(idev);
if (err)
return notifier_from_errno(err);
+ err = snmp6_register_dev(idev);
+ if (err) {
+ addrconf_sysctl_unregister(idev);
+ return notifier_from_errno(err);
+ }
}
break;
@@ -4321,6 +4339,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
}
static inline size_t inet6_ifla6_size(void)
@@ -4420,6 +4439,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (nla == NULL)
goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode))
+ goto nla_put_failure;
+
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
@@ -4524,8 +4547,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
BUG();
- if (tb[IFLA_INET6_TOKEN])
+ if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
+ u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+
+ if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
+ mode != IN6_ADDR_GEN_MODE_NONE)
+ return -EINVAL;
+ idev->addr_gen_mode = mode;
+ err = 0;
+ }
return err;
}
@@ -5168,6 +5204,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "accept_ra_from_local",
+ .data = &ipv6_devconf.accept_ra_from_local,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
},
@@ -5218,12 +5261,23 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
kfree(t);
}
-static void addrconf_sysctl_register(struct inet6_dev *idev)
+static int addrconf_sysctl_register(struct inet6_dev *idev)
{
- neigh_sysctl_register(idev->dev, idev->nd_parms,
- &ndisc_ifinfo_sysctl_change);
- __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
- idev, &idev->cnf);
+ int err;
+
+ if (!sysctl_dev_name_is_allowed(idev->dev->name))
+ return -EINVAL;
+
+ err = neigh_sysctl_register(idev->dev, idev->nd_parms,
+ &ndisc_ifinfo_sysctl_change);
+ if (err)
+ return err;
+ err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ idev, &idev->cnf);
+ if (err)
+ neigh_sysctl_unregister(idev->nd_parms);
+
+ return err;
}
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -5308,6 +5362,7 @@ static struct rtnl_af_ops inet6_ops = {
int __init addrconf_init(void)
{
+ struct inet6_dev *idev;
int i, err;
err = ipv6_addr_label_init();
@@ -5346,11 +5401,12 @@ int __init addrconf_init(void)
* device and it being up should be removed.
*/
rtnl_lock();
- if (!ipv6_add_dev(init_net.loopback_dev))
- err = -ENOMEM;
+ idev = ipv6_add_dev(init_net.loopback_dev);
rtnl_unlock();
- if (err)
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
goto errlo;
+ }
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7cb4392690dd..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -197,7 +197,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
@@ -294,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Binding to v4-mapped address on a v6-only socket
* makes no sense
*/
- if (np->ipv6only) {
+ if (sk->sk_ipv6only) {
err = -EINVAL;
goto out;
}
@@ -371,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (addr_type != IPV6_ADDR_MAPPED)
- np->ipv6only = 1;
+ sk->sk_ipv6only = 1;
}
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
+ net->ipv6.sysctl.auto_flowlabels = 0;
atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c3bf2d2e519e..2753319524f1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,6 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
+ ip6_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f6c84a6eb238..06ba3e58320b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -626,9 +626,10 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
int inner_offset;
__be16 frag_off;
u8 nexthdr;
+ struct net *net = dev_net(skb->dev);
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return;
+ goto out;
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
if (ipv6_ext_hdr(nexthdr)) {
@@ -636,14 +637,14 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
if (inner_offset<0)
- return;
+ goto out;
} else {
inner_offset = sizeof(struct ipv6hdr);
}
/* Checkin header including 8 bytes of inner protocol header. */
if (!pskb_may_pull(skb, inner_offset+8))
- return;
+ goto out;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
@@ -652,13 +653,15 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- rcu_read_lock();
ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
- rcu_read_unlock();
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
+ return;
+
+out:
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
}
/*
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..5f19dfbc4c6a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -322,7 +322,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
else
strcpy(name, "ip6gre%d");
- dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!dev)
return NULL;
@@ -723,7 +724,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
* Push down and install the IP header.
*/
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1174,7 +1176,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1);
- ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+ ip6_flow_hdr(ipv6h, 0,
+ ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
@@ -1323,7 +1327,8 @@ static int __net_init ip6gre_init_net(struct net *net)
int err;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
- ip6gre_tunnel_setup);
+ NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!ign->fb_tunnel_dev) {
err = -ENOMEM;
goto err_alloc_dev;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 45702b8cd141..315a55d66079 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -802,8 +803,8 @@ slow_path:
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
+ len));
left -= len;
fh->frag_off = htons(offset);
@@ -1156,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
int err;
int offset = 0;
__u8 tx_flags = 0;
+ u32 tskey = 0;
if (flags&MSG_PROBE)
return 0;
@@ -1271,9 +1273,12 @@ emsgsize:
}
}
- /* For UDP, check if TX timestamp is enabled */
- if (sk->sk_type == SOCK_DGRAM)
+ if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags);
+ if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ tskey = sk->sk_tskey++;
+ }
/*
* Let's try using as much space as possible.
@@ -1381,12 +1386,6 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
- else {
- /* Only the initial fragment
- * is time stamped.
- */
- tx_flags = 0;
- }
}
if (skb == NULL)
goto error;
@@ -1400,8 +1399,11 @@ alloc_new_skb:
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- if (sk->sk_type == SOCK_DGRAM)
- skb_shinfo(skb)->tx_flags = tx_flags;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = tx_flags;
+ tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
/*
* Find where to start putting bytes
@@ -1571,7 +1573,9 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, np->cork.tclass,
+ ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..f9de5a695072 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -315,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
else
sprintf(name, "ip6tnl%%d");
- dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6_tnl_dev_setup);
if (dev == NULL)
goto failed;
@@ -1046,7 +1047,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1772,7 +1774,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6_tnl_dev_setup);
+ NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 9aaa6bb229e4..7f52fd9fa7b0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
else
sprintf(name, "ip6_vti%%d");
- dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
if (dev == NULL)
goto failed;
@@ -1020,7 +1020,7 @@ static int __net_init vti6_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
- vti6_dev_setup);
+ NET_NAME_UNKNOWN, vti6_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
@@ -1089,36 +1089,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
**/
static int __init vti6_tunnel_init(void)
{
- int err;
+ const char *msg;
+ int err;
+ msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
- goto out_pernet;
+ goto pernet_dev_failed;
+ msg = "tunnel protocols";
err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
- if (err < 0) {
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
-
+ if (err < 0)
+ goto xfrm_proto_esp_failed;
err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
- if (err < 0) {
- xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
-
+ if (err < 0)
+ goto xfrm_proto_ah_failed;
err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
- if (err < 0) {
- xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
- xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
+ if (err < 0)
+ goto xfrm_proto_comp_failed;
+ msg = "netlink interface";
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1127,11 +1117,14 @@ static int __init vti6_tunnel_init(void)
rtnl_link_failed:
xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+xfrm_proto_comp_failed:
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
-out:
+xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
-out_pernet:
+pernet_dev_failed:
+ pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
@@ -1141,13 +1134,9 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
- pr_info("%s: can't deregister protocol\n", __func__);
- if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
- pr_info("%s: can't deregister protocol\n", __func__);
- if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
- pr_info("%s: can't deregister protocol\n", __func__);
-
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8250474ab7dc..f9a3fd320d1d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
else
sprintf(name, "pim6reg%u", mrt->id);
- dev = alloc_netdev(0, name, reg_vif_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
if (dev == NULL)
return NULL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index edb58aff4ae7..0c289982796d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int) ||
inet_sk(sk)->inet_num)
goto e_inval;
- np->ipv6only = valbool;
+ sk->sk_ipv6only = valbool;
retv = 0;
break;
@@ -834,6 +834,10 @@ pref_skip_coa:
np->dontfrag = valbool;
retv = 0;
break;
+ case IPV6_AUTOFLOWLABEL:
+ np->autoflowlabel = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -1058,7 +1062,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_V6ONLY:
- val = np->ipv6only;
+ val = sk->sk_ipv6only;
break;
case IPV6_RECVPKTINFO:
@@ -1158,7 +1162,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
- break;
}
case IPV6_TRANSPARENT:
@@ -1273,6 +1276,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->dontfrag;
break;
+ case IPV6_AUTOFLOWLABEL:
+ val = np->autoflowlabel;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ca8d4ea48a5d..339078f95d1b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1070,6 +1070,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
sizeof(struct ra_msg);
+ ND_PRINTK(2, info,
+ "RA: %s, dev: %s\n",
+ __func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
return;
@@ -1102,13 +1105,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, did not accept ra for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT, dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#endif
if (in6_dev->if_flags & IF_RS_SENT) {
@@ -1130,11 +1141,24 @@ static void ndisc_router_discovery(struct sk_buff *skb)
(ra_msg->icmph.icmp6_addrconf_other ?
IF_RA_OTHERCONF : 0);
- if (!in6_dev->cnf.accept_ra_defrtr)
+ if (!in6_dev->cnf.accept_ra_defrtr) {
+ ND_PRINTK(2, info,
+ "RA: %s, defrtr is false for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_defrtr;
+ }
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ /* Do not accept RA with source-addr found on local machine unless
+ * accept_ra_from_local is set to true.
+ */
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: default router ignored\n",
+ skb->dev->name);
goto skip_defrtr;
+ }
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
@@ -1163,8 +1187,10 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = NULL;
}
+ ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n",
+ rt, lifetime, skb->dev->name);
if (rt == NULL && lifetime) {
- ND_PRINTK(3, dbg, "RA: adding default router\n");
+ ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
@@ -1260,12 +1286,22 @@ skip_linkparms:
NEIGH_UPDATE_F_ISROUTER);
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, accept_ra is false for dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: router info ignored.\n",
+ skb->dev->name);
goto skip_routeinfo;
+ }
if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
@@ -1293,8 +1329,12 @@ skip_routeinfo:
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific ndopts from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#endif
if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
@@ -1728,7 +1768,7 @@ int __init ndisc_init(void)
#ifdef CONFIG_SYSCTL
err = neigh_sysctl_register(NULL, &nd_tbl.parms,
- &ndisc_ifinfo_sysctl_change);
+ ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
out:
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4bff1f297e39..ac93df16f5af 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -55,6 +55,11 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
+config NF_LOG_IPV6
+ tristate "IPv6 packet logging"
+ depends on NETFILTER_ADVANCED
+ select NF_LOG_COMMON
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 70d3dd66f2cd..c0b263104ed2 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,9 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# logging
+obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 54bd9790603f..8b147440fbdc 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -94,7 +94,6 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
break;
default:
return false;
- break;
}
nexthdr = hp->nexthdr;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0d5279fd852a..6f187c8d8a1b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -50,6 +50,7 @@
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+static const char nf_frags_cache_name[] = "nf-frags";
struct nf_ct_frag6_skb_cb
{
@@ -63,6 +64,8 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
@@ -76,14 +79,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
@@ -102,7 +108,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[0].data = &net->nf_frag.frags.timeout;
table[1].data = &net->nf_frag.frags.low_thresh;
+ table[1].extra2 = &net->nf_frag.frags.high_thresh;
table[2].data = &net->nf_frag.frags.high_thresh;
+ table[2].extra1 = &net->nf_frag.frags.low_thresh;
+ table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
hdr = register_net_sysctl(net, "net/netfilter", table);
@@ -147,16 +156,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
}
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static unsigned int nf_hashfn(const struct inet_frag_queue *q)
{
const struct frag_queue *nq;
@@ -196,7 +202,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.dst = dst;
arg.ecn = ecn;
- read_lock_bh(&nf_frags.lock);
+ local_bh_disable();
hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
@@ -217,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
int offset, end;
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE) {
+ if (fq->q.flags & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
goto err;
}
@@ -248,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -267,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN) {
+ if (fq->q.flags & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
@@ -349,10 +355,9 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
@@ -597,10 +602,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- local_bh_disable();
- inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
- local_bh_enable();
-
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq == NULL) {
@@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
goto ret_orig;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
@@ -677,13 +678,15 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
- nf_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&nf_frags);
-
+ nf_frags.frags_cache_name = nf_frags_cache_name;
+ ret = inet_frags_init(&nf_frags);
+ if (ret)
+ goto out;
ret = register_pernet_subsys(&nf_ct_net_ops);
if (ret)
inet_frags_fini(&nf_frags);
+out:
return ret;
}
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
new file mode 100644
index 000000000000..7b17a0be93e7
--- /dev/null
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -0,0 +1,417 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 5,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* One level of recursion won't kill us */
+static void dump_ipv6_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int ip6hoff,
+ int recurse)
+{
+ u_int8_t currenthdr;
+ int fragment;
+ struct ipv6hdr _ip6h;
+ const struct ipv6hdr *ih;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+ if (ih == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+ nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+
+ /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+ nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+ ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+ (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+ ih->hop_limit,
+ (ntohl(*(__be32 *)ih) & 0x000fffff));
+
+ fragment = 0;
+ ptr = ip6hoff + sizeof(struct ipv6hdr);
+ currenthdr = ih->nexthdr;
+ while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+ struct ipv6_opt_hdr _hdr;
+ const struct ipv6_opt_hdr *hp;
+
+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 48 "OPT (...) " */
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, "OPT ( ");
+
+ switch (currenthdr) {
+ case IPPROTO_FRAGMENT: {
+ struct frag_hdr _fhdr;
+ const struct frag_hdr *fh;
+
+ nf_log_buf_add(m, "FRAG:");
+ fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+ &_fhdr);
+ if (fh == NULL) {
+ nf_log_buf_add(m, "TRUNCATED ");
+ return;
+ }
+
+ /* Max length: 6 "65535 " */
+ nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+ /* Max length: 11 "INCOMPLETE " */
+ if (fh->frag_off & htons(0x0001))
+ nf_log_buf_add(m, "INCOMPLETE ");
+
+ nf_log_buf_add(m, "ID:%08x ",
+ ntohl(fh->identification));
+
+ if (ntohs(fh->frag_off) & 0xFFF8)
+ fragment = 1;
+
+ hdrlen = 8;
+
+ break;
+ }
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_HOPOPTS:
+ if (fragment) {
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ")");
+ return;
+ }
+ hdrlen = ipv6_optlen(hp);
+ break;
+ /* Max Length */
+ case IPPROTO_AH:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ /* Max length: 3 "AH " */
+ nf_log_buf_add(m, "AH ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+ &_ahdr);
+ if (ah == NULL) {
+ /*
+ * Max length: 26 "INCOMPLETE [65535
+ * bytes] )"
+ */
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+
+ }
+
+ hdrlen = (hp->hdrlen+2)<<2;
+ break;
+ case IPPROTO_ESP:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 4 "ESP " */
+ nf_log_buf_add(m, "ESP ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ /*
+ * Max length: 26 "INCOMPLETE [65535 bytes] )"
+ */
+ eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+ &_esph);
+ if (eh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 16 "SPI=0xF1234567 )" */
+ nf_log_buf_add(m, "SPI=0x%x )",
+ ntohl(eh->spi));
+ }
+ return;
+ default:
+ /* Max length: 20 "Unknown Ext Hdr 255" */
+ nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
+ return;
+ }
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ") ");
+
+ currenthdr = hp->nexthdr;
+ ptr += hdrlen;
+ }
+
+ switch (currenthdr) {
+ case IPPROTO_TCP:
+ if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
+ ptr, logflags))
+ return;
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
+ return;
+ break;
+ case IPPROTO_ICMPV6: {
+ struct icmp6hdr _icmp6h;
+ const struct icmp6hdr *ic;
+
+ /* Max length: 13 "PROTO=ICMPv6 " */
+ nf_log_buf_add(m, "PROTO=ICMPv6 ");
+
+ if (fragment)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+ if (ic == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ nf_log_buf_add(m, "TYPE=%u CODE=%u ",
+ ic->icmp6_type, ic->icmp6_code);
+
+ switch (ic->icmp6_type) {
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ nf_log_buf_add(m, "ID=%u SEQ=%u ",
+ ntohs(ic->icmp6_identifier),
+ ntohs(ic->icmp6_sequence));
+ break;
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ break;
+
+ case ICMPV6_PARAMPROB:
+ /* Max length: 17 "POINTER=ffffffff " */
+ nf_log_buf_add(m, "POINTER=%08x ",
+ ntohl(ic->icmp6_pointer));
+ /* Fall through */
+ case ICMPV6_DEST_UNREACH:
+ case ICMPV6_PKT_TOOBIG:
+ case ICMPV6_TIME_EXCEED:
+ /* Max length: 3+maxlen */
+ if (recurse) {
+ nf_log_buf_add(m, "[");
+ dump_ipv6_packet(m, info, skb,
+ ptr + sizeof(_icmp6h), 0);
+ nf_log_buf_add(m, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
+ nf_log_buf_add(m, "MTU=%u ",
+ ntohl(ic->icmp6_mtu));
+ }
+ }
+ break;
+ }
+ /* Max length: 10 "PROTO=255 " */
+ default:
+ nf_log_buf_add(m, "PROTO=%u ", currenthdr);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & XT_LOG_UID) && recurse)
+ nf_log_dump_sk_uid_gid(m, skb->sk);
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (recurse && skb->mark)
+ nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+}
+
+static void dump_ipv6_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ unsigned int logflags = 0;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+
+ if (!(logflags & XT_LOG_MACDECODE))
+ goto fallback;
+
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+ return;
+ default:
+ break;
+ }
+
+fallback:
+ nf_log_buf_add(m, "MAC=");
+ if (dev->hard_header_len &&
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
+ unsigned int len = dev->hard_header_len;
+ unsigned int i;
+
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p != NULL) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+ nf_log_buf_add(m, " ");
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+ nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
+ &iph->daddr);
+ }
+ } else {
+ nf_log_buf_add(m, " ");
+ }
+}
+
+static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
+ loginfo, prefix);
+
+ if (in != NULL)
+ dump_ipv6_mac_header(m, loginfo, skb);
+
+ dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip6_logger __read_mostly = {
+ .name = "nf_log_ipv6",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_ip6_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_ipv6_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_ipv6_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_ip6_logger);
+}
+
+static struct pernet_operations nf_log_ipv6_net_ops = {
+ .init = nf_log_ipv6_net_init,
+ .exit = nf_log_ipv6_net_exit,
+};
+
+static int __init nf_log_ipv6_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_ipv6_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __exit nf_log_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_ipv6_net_ops);
+ nf_log_unregister(&nf_ip6_logger);
+}
+
+module_init(nf_log_ipv6_init);
+module_exit(nf_log_ipv6_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index abfe75a2e316..fc8e49b2ff3e 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -158,6 +158,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
htons(oldlen), htons(datalen), 1);
}
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
@@ -175,6 +176,7 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
return 0;
}
+#endif
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
@@ -183,7 +185,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#endif
#ifdef CONFIG_XFRM
.decode_session = nf_nat_ipv6_decode_session,
#endif
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 3317440ea341..2d6f860e5c1e 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -33,6 +33,7 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(net), ip6_frag_mem(net));
+ seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
return 0;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b2dc60b0c764..39d44226e402 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -176,7 +176,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
goto out;
net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+ sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
while (sk) {
int filtered;
@@ -220,7 +220,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
}
}
sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
out:
read_unlock(&raw_v6_hashinfo.lock);
@@ -375,7 +375,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
net = dev_net(skb->dev);
while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- IP6CB(skb)->iif))) {
+ inet6_iif(skb)))) {
rawv6_err(sk, skb, NULL, type, code,
inner_offset, info);
sk = sk_next(sk);
@@ -506,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
*addr_len = sizeof(*sin6);
}
@@ -588,8 +588,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
}
offset += skb_transport_offset(skb);
- if (skb_copy_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_copy_bits(skb, offset, &csum, 2));
/* in case cksum was not initialized */
if (unlikely(csum))
@@ -601,8 +600,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
csum = CSUM_MANGLED_0;
- if (skb_store_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_store_bits(skb, offset, &csum, 2));
send:
err = ip6_push_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index cc85a9ba5010..c6557d9f7808 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -60,6 +60,8 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+static const char ip6_frag_cache_name[] = "ip6-frags";
+
struct ip6frag_skb_cb
{
struct inet6_skb_parm h;
@@ -85,27 +87,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, ip6_frags.rnd);
}
-static unsigned int ip6_hashfn(struct inet_frag_queue *q)
+static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
{
- struct frag_queue *fq;
+ const struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
-bool ip6_frag_match(struct inet_frag_queue *q, void *a)
+bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct frag_queue *fq;
- struct ip6_create_arg *arg = a;
+ const struct frag_queue *fq;
+ const struct ip6_create_arg *arg = a;
fq = container_of(q, struct frag_queue, q);
return fq->id == arg->id &&
@@ -115,10 +113,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a)
}
EXPORT_SYMBOL(ip6_frag_match);
-void ip6_frag_init(struct inet_frag_queue *q, void *a)
+void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- struct ip6_create_arg *arg = a;
+ const struct ip6_create_arg *arg = a;
fq->id = arg->id;
fq->user = arg->user;
@@ -135,7 +133,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
spin_lock(&fq->q.lock);
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q, frags);
@@ -145,17 +143,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev)
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ if (fq->q.flags & INET_FRAG_EVICTED)
+ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out_rcu_unlock;
- /*
- But use as source device on which LAST ARRIVED
- segment was received. And do not use fq->dev
- pointer directly, device might already disappeared.
+ /* But use as source device on which LAST ARRIVED
+ * segment was received. And do not use fq->dev
+ * pointer directly, device might already disappeared.
*/
fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
@@ -192,7 +193,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.dst = dst;
arg.ecn = ecn;
- read_lock(&ip6_frags.lock);
hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
@@ -212,7 +212,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -243,9 +243,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err;
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -263,7 +263,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN)
+ if (fq->q.flags & INET_FRAG_LAST_IN)
goto err;
fq->q.len = end;
}
@@ -338,10 +338,10 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
int res;
unsigned long orefdst = skb->_skb_refdst;
@@ -353,14 +353,13 @@ found:
}
skb_dst_drop(skb);
- inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
inet_frag_kill(&fq->q, &ip6_frags);
err:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
@@ -523,7 +522,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
- int evicted;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -552,11 +550,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
- if (evicted)
- IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS, evicted);
-
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq != NULL) {
@@ -576,7 +569,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return -1;
fail_hdr:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
@@ -588,20 +582,25 @@ static const struct inet6_protocol frag_protocol =
};
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
@@ -613,10 +612,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{ }
};
+/* secret interval has been deprecated */
+static int ip6_frags_secret_interval_unused;
static struct ctl_table ip6_frags_ctl_table[] = {
{
.procname = "ip6frag_secret_interval",
- .data = &ip6_frags.secret_interval,
+ .data = &ip6_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -636,7 +637,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
table[0].data = &net->ipv6.frags.high_thresh;
+ table[0].extra1 = &net->ipv6.frags.low_thresh;
+ table[0].extra2 = &init_net.ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
+ table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
/* Don't export sysctls to unprivileged users */
@@ -746,8 +750,10 @@ int __init ipv6_frag_init(void)
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&ip6_frags);
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ret = inet_frags_init(&ip6_frags);
+ if (ret)
+ goto err_pernet;
out:
return ret;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4f408176dc64..2e9ba035fb5f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
else
strcpy(name, "sit%d");
- dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ipip6_tunnel_setup);
if (dev == NULL)
return NULL;
@@ -1729,6 +1730,7 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[3] = sitn->tunnels_r_l;
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+ NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
if (!sitn->fb_tunnel_dev) {
err = -ENOMEM;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a822b880689b..83cea1d39466 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops);
if (!req)
goto out;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..0c56c93619e0 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -39,6 +39,13 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "auto_flowlabels",
+ .data = &init_net.ipv6.sysctl.auto_flowlabels,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
@@ -74,6 +81,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+ ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
+ ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 229239ad96b1..22055b098428 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
+ ip6_set_txhash(sk);
+
/*
* TCP over IPv4
*/
@@ -470,13 +472,14 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
- struct flowi6 *fl6,
+ struct flowi *fl,
struct request_sock *req,
u16 queue_mapping,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
@@ -503,18 +506,6 @@ done:
return err;
}
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
-{
- struct flowi6 fl6;
- int res;
-
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
- if (!res) {
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
- }
- return res;
-}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
@@ -718,22 +709,66 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
}
#endif
+static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+ ireq->ir_iif = sk->sk_bound_dev_if;
+
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ if (!TCP_SKB_CB(skb)->when &&
+ (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+ np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
+ np->rxopt.bits.rxohlim || np->repflow)) {
+ atomic_inc(&skb->users);
+ ireq->pktopts = skb;
+ }
+}
+
+static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+ const struct request_sock *req,
+ bool *strict)
+{
+ if (strict)
+ *strict = true;
+ return inet6_csk_route_req(sk, &fl->u.ip6, req);
+}
+
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
- .rtx_syn_ack = tcp_v6_rtx_synack,
+ .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+ .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr),
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v6_reqsk_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
-};
#endif
+ .init_req = tcp_v6_init_req,
+#ifdef CONFIG_SYN_COOKIES
+ .cookie_init_seq = cookie_v6_init_sequence,
+#endif
+ .route_req = tcp_v6_route_req,
+ .init_seq = tcp_v6_init_sequence,
+ .send_synack = tcp_v6_send_synack,
+ .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
+};
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
u32 tsval, u32 tsecr, int oif,
@@ -973,153 +1008,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-/* FIXME: this is substantially similar to the ipv4 code.
- * Can some kind of merge be done? -- erics
- */
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_options_received tmp_opt;
- struct request_sock *req;
- struct inet_request_sock *ireq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- __u32 isn = TCP_SKB_CB(skb)->when;
- struct dst_entry *dst = NULL;
- struct tcp_fastopen_cookie foc = { .len = -1 };
- bool want_cookie = false, fastopen;
- struct flowi6 fl6;
- int err;
-
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
goto drop;
- if ((sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
- if (!want_cookie)
- goto drop;
- }
-
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
- goto drop;
- }
-
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
- if (req == NULL)
- goto drop;
-
-#ifdef CONFIG_TCP_MD5SIG
- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
-#endif
-
- tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
- tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+ return tcp_conn_request(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
- if (want_cookie && !tmp_opt.saw_tstamp)
- tcp_clear_options(&tmp_opt);
-
- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- tcp_openreq_init(req, &tmp_opt, skb);
-
- ireq = inet_rsk(req);
- ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
- ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb, sock_net(sk));
-
- ireq->ir_iif = sk->sk_bound_dev_if;
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
-
- if (!isn) {
- if (ipv6_opt_accepted(sk, skb) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
- np->repflow) {
- atomic_inc(&skb->users);
- ireq->pktopts = skb;
- }
-
- if (want_cookie) {
- isn = cookie_v6_init_sequence(sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- goto have_isn;
- }
-
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (tmp_opt.saw_tstamp &&
- tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
- if (!tcp_peer_is_proven(req, dst, true)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
- /* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false)) {
- /* Without syncookies last quarter of
- * backlog is filled with destinations,
- * proven to be alive.
- * It means that we continue to communicate
- * to destinations, already remembered
- * to the moment of synflood.
- */
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
- goto drop_and_release;
- }
-
- isn = tcp_v6_init_sequence(skb);
- }
-have_isn:
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_release;
-
- if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
- goto drop_and_free;
-
- tcp_rsk(req)->snt_isn = isn;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_openreq_init_rwin(req, sk, dst);
- fastopen = !want_cookie &&
- tcp_try_fastopen(sk, skb, req, &foc, dst);
- err = tcp_v6_send_synack(sk, dst, &fl6, req,
- skb_get_queue_mapping(skb), &foc);
- if (!fastopen) {
- if (err || want_cookie)
- goto drop_and_free;
-
- tcp_rsk(req)->listener = NULL;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- }
- return 0;
-
-drop_and_release:
- dst_release(dst);
-drop_and_free:
- reqsk_free(req);
drop:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0; /* don't send reset */
@@ -1235,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
+ ip6_set_txhash(newsk);
+
/* Now IPv6 options...
First: no IPv4 options.
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7092ff78fd84..4836af8f582d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net,
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (addr_type == IPV6_ADDR_ANY &&
- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
@@ -473,7 +472,7 @@ try_again:
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
*addr_len = sizeof(*sin6);
}
@@ -534,11 +533,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+ sk = __udp6_lib_lookup(net, daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
- if (sk == NULL)
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
+ }
if (type == ICMPV6_PKT_TOOBIG) {
if (!ip6_sk_accept_pmtu(sk))
@@ -674,7 +677,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
@@ -703,43 +706,26 @@ drop:
return -1;
}
-static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif, unsigned short hnum)
{
- struct hlist_nulls_node *node;
- unsigned short num = ntohs(loc_port);
-
- sk_nulls_for_each_from(sk, node) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (udp_sk(sk)->udp_port_hash == num &&
- sk->sk_family == PF_INET6) {
- if (inet->inet_dport) {
- if (inet->inet_dport != rmt_port)
- continue;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
- continue;
+ struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- continue;
- }
- if (!inet6_mc_check(sk, loc_addr, rmt_addr))
- continue;
- return sk;
- }
- }
- return NULL;
+ if (!net_eq(sock_net(sk), net))
+ return false;
+
+ if (udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6 ||
+ (inet->inet_dport && inet->inet_dport != rmt_port) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ return false;
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
+ return false;
+ return true;
}
static void flush_stack(struct sock **stack, unsigned int count,
@@ -763,6 +749,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
skb1 = NULL;
+ sock_put(sk);
}
if (unlikely(skb1))
kfree_skb(skb1);
@@ -788,43 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
- struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
- int dif;
- unsigned int i, count = 0;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(uh->dest);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+ int dif = inet6_iif(skb);
+ unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+
+ if (use_hash2) {
+ hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ udp_table.mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+ hslot = &udp_table.hash2[hash2];
+ offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+ }
spin_lock(&hslot->lock);
- sk = sk_nulls_head(&hslot->head);
- dif = inet6_iif(skb);
- sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- while (sk) {
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- if (uh->check || udp_sk(sk)->no_check6_rx)
+ sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
+ if (__udp_v6_is_mcast_sock(net, sk,
+ uh->dest, daddr,
+ uh->source, saddr,
+ dif, hnum) &&
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ (uh->check || udp_sk(sk)->no_check6_rx)) {
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
stack[count++] = sk;
-
- sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if (unlikely(count == ARRAY_SIZE(stack))) {
- if (!sk)
- break;
- flush_stack(stack, count, skb, ~0);
- count = 0;
+ sock_hold(sk);
}
}
- /*
- * before releasing the lock, we must take reference on sockets
- */
- for (i = 0; i < count; i++)
- sock_hold(stack[i]);
spin_unlock(&hslot->lock);
+ /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+ if (use_hash2 && hash2 != hash2_any) {
+ hash2 = hash2_any;
+ goto start_lookup;
+ }
+
if (count) {
flush_stack(stack, count, skb, count - 1);
-
- for (i = 0; i < count; i++)
- sock_put(stack[i]);
} else {
kfree_skb(skb);
}