diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 08:26:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 08:26:13 -0800 |
commit | 6456300356433873309a1cae6aa05e77d6b59153 (patch) | |
tree | 3158f04f2ca63a48e4d3021aba31aee8f18221cf /net/ipv6 | |
parent | cd2a3bf02625ffad02a6b9f7df758ee36cf12769 (diff) | |
parent | 18a4d8bf250a33c015955f0dec27259780ef6448 (diff) | |
download | linux-6456300356433873309a1cae6aa05e77d6b59153.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Here we go, another merge window full of networking and #ebpf changes:
1) Snoop DHCPACKS in batman-adv to learn MAC/IP pairs in the DHCP
range without dealing with floods of ARP traffic, from Linus
Lüssing.
2) Throttle buffered multicast packet transmission in mt76, from
Felix Fietkau.
3) Support adaptive interrupt moderation in ice, from Brett Creeley.
4) A lot of struct_size conversions, from Gustavo A. R. Silva.
5) Add peek/push/pop commands to bpftool, as well as bash completion,
from Stanislav Fomichev.
6) Optimize sk_msg_clone(), from Vakul Garg.
7) Add SO_BINDTOIFINDEX, from David Herrmann.
8) Be more conservative with local resends due to local congestion,
from Yuchung Cheng.
9) Allow vetoing of unsupported VXLAN FDBs, from Petr Machata.
10) Add health buffer support to devlink, from Eran Ben Elisha.
11) Add TXQ scheduling API to mac80211, from Toke Høiland-Jørgensen.
12) Add statistics to basic packet scheduler filter, from Cong Wang.
13) Add GRE tunnel support for mlxsw Spectrum-2, from Nir Dotan.
14) Lots of new IP tunneling forwarding tests, also from Nir Dotan.
15) Add 3ad stats to bonding, from Nikolay Aleksandrov.
16) Lots of probing improvements for bpftool, from Quentin Monnet.
17) Various nfp drive #ebpf JIT improvements from Jakub Kicinski.
18) Allow #ebpf programs to access gso_segs from skb shared info, from
Eric Dumazet.
19) Add sock_diag support for AF_XDP sockets, from Björn Töpel.
20) Support 22260 iwlwifi devices, from Luca Coelho.
21) Use rbtree for ipv6 defragmentation, from Peter Oskolkov.
22) Add JMP32 instruction class support to #ebpf, from Jiong Wang.
23) Add spinlock support to #ebpf, from Alexei Starovoitov.
24) Support 256-bit keys and TLS 1.3 in ktls, from Dave Watson.
25) Add device infomation API to devlink, from Jakub Kicinski.
26) Add new timestamping socket options which are y2038 safe, from
Deepa Dinamani.
27) Add RX checksum offloading for various sh_eth chips, from Sergei
Shtylyov.
28) Flow offload infrastructure, from Pablo Neira Ayuso.
29) Numerous cleanups, improvements, and bug fixes to the PHY layer
and many drivers from Heiner Kallweit.
30) Lots of changes to try and make packet scheduler classifiers run
lockless as much as possible, from Vlad Buslov.
31) Support BCM957504 chip in bnxt_en driver, from Erik Burrows.
32) Add concurrency tests to tc-tests infrastructure, from Vlad
Buslov.
33) Add hwmon support to aquantia, from Heiner Kallweit.
34) Allow 64-bit values for SO_MAX_PACING_RATE, from Eric Dumazet.
And I would be remiss if I didn't thank the various major networking
subsystem maintainers for integrating much of this work before I even
saw it. Alexei Starovoitov, Daniel Borkmann, Pablo Neira Ayuso,
Johannes Berg, Kalle Valo, and many others. Thank you!"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2207 commits)
net/sched: avoid unused-label warning
net: ignore sysctl_devconf_inherit_init_net without SYSCTL
phy: mdio-mux: fix Kconfig dependencies
net: phy: use phy_modify_mmd_changed in genphy_c45_an_config_aneg
net: dsa: mv88e6xxx: add call to mv88e6xxx_ports_cmode_init to probe for new DSA framework
selftest/net: Remove duplicate header
sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79
net/mlx5e: Update tx reporter status in case channels were successfully opened
devlink: Add support for direct reporter health state update
devlink: Update reporter state to error even if recover aborted
sctp: call iov_iter_revert() after sending ABORT
team: Free BPF filter when unregistering netdev
ip6mr: Do not call __IP6_INC_STATS() from preemptible context
isdn: mISDN: Fix potential NULL pointer dereference of kzalloc
net: dsa: mv88e6xxx: support in-band signalling on SGMII ports with external PHYs
cxgb4/chtls: Prefix adapter flags with CXGB4
net-sysfs: Switch to bitmap_zalloc()
mellanox: Switch to bitmap_zalloc()
bpf: add test cases for non-pointer sanitiation logic
mlxsw: i2c: Extend initialization by querying resources data
...
Diffstat (limited to 'net/ipv6')
29 files changed, 530 insertions, 1474 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 72ffd3d760ff..4ae17a966ae3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet6_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlattr *tb[NETCONFA_MAX+1]; struct inet6_dev *in6_dev = NULL; struct net_device *dev = NULL; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy, extack); + err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -5182,6 +5217,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFA_TARGET_NETNSID: + case IFA_ADDRESS: + case IFA_LOCAL: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -5202,8 +5283,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -6825,6 +6905,12 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; + if (IS_ENABLED(CONFIG_SYSCTL) && + sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + } + /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 5cd0029d930e..6c79af056d9b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, return -EAFNOSUPPORT; } +static int eafnosupport_ipv6_route_input(struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} + static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) { return NULL; @@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, + .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0d1ee82ee55b..d43d076c98f5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } +static int ip6addrlbl_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFAL_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFAL_ADDRESS: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); + return -EINVAL; + } + } + + return 0; +} + static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d99753b5e39b..2f45d2a3e3a3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; +static int ipv6_route_input(struct sk_buff *skb) +{ + ip6_route_input(skb); + return skb_dst(skb)->error; +} + static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup = ip6_dst_lookup, + .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bbcdfd299692..802faa2fcc0e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -81,7 +81,7 @@ */ static inline struct sock *icmpv6_sk(struct net *net) { - return net->ipv6.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv6.icmp_sk); } static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -953,13 +953,21 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } +static void __net_exit icmpv6_sk_exit(struct net *net) +{ + int i; + + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i)); + free_percpu(net->ipv6.icmp_sk); +} + static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; - int err, i, j; + int err, i; - net->ipv6.icmp_sk = - kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL); + net->ipv6.icmp_sk = alloc_percpu(struct sock *); if (!net->ipv6.icmp_sk) return -ENOMEM; @@ -972,7 +980,7 @@ static int __net_init icmpv6_sk_init(struct net *net) goto fail; } - net->ipv6.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. @@ -982,22 +990,10 @@ static int __net_init icmpv6_sk_init(struct net *net) return 0; fail: - for (j = 0; j < i; j++) - inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]); - kfree(net->ipv6.icmp_sk); + icmpv6_sk_exit(net); return err; } -static void __net_exit icmpv6_sk_exit(struct net *net) -{ - int i; - - for_each_possible_cpu(i) { - inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]); - } - kfree(net->ipv6.icmp_sk); -} - static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 17c455ff69ff..79d2e43c05c5 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -383,12 +383,9 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) struct rhashtable_iter iter; struct ila_map *ila; spinlock_t *lock; - int ret; - - ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL); - if (ret) - goto done; + int ret = 0; + rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter); rhashtable_walk_start(&iter); for (;;) { @@ -509,23 +506,17 @@ int ila_xlat_nl_dump_start(struct netlink_callback *cb) struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter; - int ret; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; - ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter, - GFP_KERNEL); - if (ret) { - kfree(iter); - return ret; - } + rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter); iter->skip = 0; cb->args[0] = (long)iter; - return ret; + return 0; } int ila_xlat_nl_dump_done(struct netlink_callback *cb) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 26f25b6e2833..b32c95f02128 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@ -607,7 +607,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5c045691c302..345882d9c061 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -383,9 +383,36 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static struct sk_buff *sit_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + +static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return inet_gso_segment(skb, features); +} + +static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + static const struct net_offload sit_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = sit_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, @@ -393,7 +420,7 @@ static const struct net_offload sit_offload = { static const struct net_offload ip4ip6_offload = { .callbacks = { - .gso_segment = inet_gso_segment, + .gso_segment = ip4ip6_gso_segment, .gro_receive = ip4ip6_gro_receive, .gro_complete = ip4ip6_gro_complete, }, @@ -401,7 +428,7 @@ static const struct net_offload ip4ip6_offload = { static const struct net_offload ip6ip6_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = ip6ip6_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = ip6ip6_gro_complete, }, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f9fa0302b5a..edbd12067170 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -300,6 +300,12 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (np && np->rtalert_isolate && + !net_eq(sock_net(sk), dev_net(skb->dev))) { + continue; + } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index ad1a9ccd4b44..25430c991cea 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; } if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cc01aa3f2b5e..e4dd57976737 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | + MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -1496,42 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct mr_mfc *c, *tmp; LIST_HEAD(list); int i; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) - continue; - mif6_delete(mrt, i, 0, &list); + if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { + for (i = 0; i < mrt->maxvif; i++) { + if (((mrt->vif_table[i].flags & VIFF_STATIC) && + !(flags & MRT6_FLUSH_MIFS_STATIC)) || + (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) + continue; + mif6_delete(mrt, i, 0, &list); + } + unregister_netdevice_many(&list); } - unregister_netdevice_many(&list); /* Wipe the cache */ - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); - list_del_rcu(&c->list); - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_ENTRY_DEL, - (struct mfc6_cache *)c, mrt->id); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); - mr_cache_put(c); + if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || + (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, mrt->id); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + mr_cache_put(c); + } } - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { - list_del(&c->list); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, - RTM_DELROUTE); - ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + if (flags & MRT6_FLUSH_MFC) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { + list_del(&c->list); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + } + spin_unlock_bh(&mfc_unres_lock); } - spin_unlock_bh(&mfc_unres_lock); } } @@ -1587,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); - mroute_clean_tables(mrt, false); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); err = 0; break; } @@ -1703,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns rtnl_unlock(); return ret; + case MRT6_FLUSH: + { + int flags; + + if (optlen != sizeof(flags)) + return -EINVAL; + if (get_user(flags, (int __user *)optval)) + return -EFAULT; + rtnl_lock(); + mroute_clean_tables(mrt, flags); + rtnl_unlock(); + return 0; + } + /* * Control PIM assert (to activate pim will activate assert) */ @@ -1964,10 +1988,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 973e215c3114..40f21fef25ff 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -787,6 +787,12 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + goto e_inval; + np->rtalert_isolate = valbool; + retv = 0; + break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; @@ -1358,6 +1364,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.recvfragsize; break; + case IPV6_ROUTER_ALERT_ISOLATE: + val = np->rtalert_isolate; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 21f6deb2aec9..42f3f5cd349f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -940,6 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) { return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE); } +EXPORT_SYMBOL(ipv6_dev_mc_inc); /* * device multicast group del @@ -987,6 +988,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) return err; } +EXPORT_SYMBOL(ipv6_dev_mc_dec); /* * check if the interface/address pair is valid diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index 9405b04eecc6..dddd75d1be0e 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) if (skb->len < len || len <= offset) return -EINVAL; + skb_set_transport_header(skb, offset); + return 0; } @@ -77,27 +79,27 @@ static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) len += sizeof(struct mld2_report); - return pskb_may_pull(skb, len) ? 0 : -EINVAL; + return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL; } static int ipv6_mc_check_mld_query(struct sk_buff *skb) { + unsigned int transport_len = ipv6_transport_len(skb); struct mld_msg *mld; - unsigned int len = skb_transport_offset(skb); + unsigned int len; /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - len += sizeof(struct mld_msg); - if (skb->len < len) - return -EINVAL; - /* MLDv1? */ - if (skb->len != len) { + if (transport_len != sizeof(struct mld_msg)) { /* or MLDv2? */ - len += sizeof(struct mld2_query) - sizeof(struct mld_msg); - if (skb->len < len || !pskb_may_pull(skb, len)) + if (transport_len < sizeof(struct mld2_query)) + return -EINVAL; + + len = skb_transport_offset(skb) + sizeof(struct mld2_query); + if (!ipv6_mc_may_pull(skb, len)) return -EINVAL; } @@ -115,12 +117,17 @@ static int ipv6_mc_check_mld_query(struct sk_buff *skb) static int ipv6_mc_check_mld_msg(struct sk_buff *skb) { - struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + struct mld_msg *mld; + + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; + + mld = (struct mld_msg *)skb_transport_header(skb); switch (mld->mld_type) { case ICMPV6_MGM_REDUCTION: case ICMPV6_MGM_REPORT: - /* fall through */ return 0; case ICMPV6_MLD2_REPORT: return ipv6_mc_check_mld_reportv2(skb); @@ -136,49 +143,30 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } -static int __ipv6_mc_check_mld(struct sk_buff *skb, - struct sk_buff **skb_trimmed) - +int ipv6_mc_check_icmpv6(struct sk_buff *skb) { - struct sk_buff *skb_chk = NULL; - unsigned int transport_len; - unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret = -EINVAL; + unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr); + unsigned int transport_len = ipv6_transport_len(skb); + struct sk_buff *skb_chk; - transport_len = ntohs(ipv6_hdr(skb)->payload_len); - transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - goto err; - - if (!pskb_may_pull(skb_chk, len)) - goto err; - - ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) - goto err; - - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) - kfree_skb(skb_chk); - - ret = 0; + return -EINVAL; -err: - if (ret && skb_chk && skb_chk != skb) + if (skb_chk != skb) kfree_skb(skb_chk); - return ret; + return 0; } +EXPORT_SYMBOL(ipv6_mc_check_icmpv6); /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets * skb transport header accordingly and returns zero. @@ -188,18 +176,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an MLD packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an MLD packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ipv6_mc_check_mld(struct sk_buff *skb) { int ret; @@ -211,6 +191,10 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ret < 0) return ret; - return __ipv6_mc_check_mld(skb, skb_trimmed); + ret = ipv6_mc_check_icmpv6(skb); + if (ret < 0) + return ret; + + return ipv6_mc_check_mld_msg(skb); } EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0b1f3e927b..1240ccd57f39 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -86,8 +86,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -107,12 +107,17 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { +#if IS_MODULE(CONFIG_IPV6) .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, +#endif + .route_input = ip6_route_input, .fragment = ip6_fragment, - .route = nf_ip6_route, .reroute = nf_ip6_reroute, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 339d0762b027..ddc99a1653aa 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -31,34 +31,6 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -if NF_NAT_IPV6 - -config NFT_CHAIN_NAT_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - -endif # NF_NAT_IPV6 - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -106,23 +78,6 @@ config NF_LOG_IPV6 default m if NETFILTER_ADVANCED=n select NF_LOG_COMMON -config NF_NAT_IPV6 - tristate "IPv6 NAT" - depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED - select NF_NAT - help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. This can be - controlled by iptables or nft. - -if NF_NAT_IPV6 - -config NF_NAT_MASQUERADE_IPV6 - bool - -endif # NF_NAT_IPV6 - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -311,7 +266,6 @@ config IP6_NF_NAT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_NAT - select NF_NAT_IPV6 select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, @@ -324,7 +278,7 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE_IPV6 + select NF_NAT_MASQUERADE help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ea43d5256e0..3853c648ebaa 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,10 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o -nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o -nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o -obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o - # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o @@ -32,10 +28,7 @@ obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o -obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o -obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o -obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 67ba70ab9f5c..3e1fab9d7503 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -17,8 +17,6 @@ #include <net/ipv6.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> static int __net_init ip6table_nat_table_init(struct net *net); @@ -72,10 +70,10 @@ static int ip6t_nat_register_lookups(struct net *net) int i, ret; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) { - ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); + ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); if (ret) { while (i) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); return ret; } @@ -89,7 +87,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) int i; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); } static int __net_init ip6table_nat_table_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 181da2c40f9a..3de0e9b0a482 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { - struct sk_buff *prev, *next; unsigned int payload_len; - int offset, end; + struct net_device *dev; + struct sk_buff *prev; + int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { @@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, goto err; } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ - if (skb->dev) - fq->iif = skb->dev->ifindex; + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -319,11 +286,25 @@ found: fq->q.flags |= INET_FRAG_FIRST_IN; } - return 0; + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = nf_ct_frag6_reasm(fq, skb, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); + return -EINPROGRESS; -discard_fq: +insert_error: + if (err == IPFRAG_DUP) + goto err; inet_frag_kill(&fq->q); err: + skb_dst_drop(skb); return -EINVAL; } @@ -333,147 +314,66 @@ err: * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. - * - * returns true if *prev skb has been transformed into the reassembled - * skb, false otherwise. */ -static bool -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); - ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - return false; + goto err; + + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto err; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); - return false; - } - - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - return false; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) - return false; - - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - /* morph head into last received skb: prev. - * - * This allows callers of ipv6 conntrack defrag to continue - * to use the last skb(frag) passed into the reasm engine. - * The last skb frag 'silently' turns into the full reassembled skb. - * - * Since prev is also part of q->fragments we have to clone it first. - */ - if (head != prev) { - struct sk_buff *iter; - - fp = skb_clone(prev, GFP_ATOMIC); - if (!fp) - return false; - - fp->next = prev->next; - - iter = head; - while (iter) { - if (iter->next == prev) { - iter->next = fp; - break; - } - iter = iter->next; - } - - skb_morph(prev, head); - prev->next = head->next; - consume_skb(head); - head = prev; + goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_shinfo(head)->frag_list = head->next; - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - for (fp = head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp->sk = NULL; - } - sub_frag_mem_limit(fq->q.net, head->truesize); + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); + + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - head->ignore_df = 1; - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + skb->ignore_df = 1; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(skb_network_header(skb), + skb_network_header_len(skb), + skb->csum); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; - return true; + return 0; + +err: + inet_frag_kill(&fq->q); + return -EINVAL; } /* @@ -542,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; - struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -565,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - return -EINVAL; - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); @@ -580,31 +475,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); - if (ret < 0) { - if (ret == -EPROTO) { - skb->transport_header = savethdr; - ret = 0; - } - goto out_unlock; + if (ret == -EPROTO) { + skb->transport_header = savethdr; + ret = 0; } /* after queue has assumed skb ownership, only 0 or -EINPROGRESS * must be returned. */ - ret = -EINPROGRESS; - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - unsigned long orefdst = skb->_skb_refdst; - - skb->_skb_refdst = 0UL; - if (nf_ct_frag6_reasm(fq, skb, dev)) - ret = 0; - skb->_skb_refdst = orefdst; - } else { - skb_dst_drop(skb); - } + if (ret) + ret = -EINPROGRESS; -out_unlock: spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c deleted file mode 100644 index 7a41ee3c11b4..000000000000 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of IPv6 NAT funded by Astaro. - */ -#include <linux/types.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/secure_seq.h> -#include <net/checksum.h> -#include <net/ip6_checksum.h> -#include <net/ip6_route.h> -#include <net/ipv6.h> - -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; - -#ifdef CONFIG_XFRM -static void nf_nat_ipv6_decode_session(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl) -{ - const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; - struct flowi6 *fl6 = &fl->u.ip6; - - if (ct->status & statusbit) { - fl6->daddr = t->dst.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_dport = t->dst.u.all; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl6->saddr = t->src.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_sport = t->src.u.all; - } -} -#endif - -static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct ipv6hdr *ipv6h; - __be16 frag_off; - int hdroff; - u8 nexthdr; - - if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) - return false; - - ipv6h = (void *)skb->data + iphdroff; - nexthdr = ipv6h->nexthdr; - hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), - &nexthdr, &frag_off); - if (hdroff < 0) - goto manip_addr; - - if ((frag_off & htons(~0x7)) == 0 && - !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, - target, maniptype)) - return false; - - /* must reload, offset might have changed */ - ipv6h = (void *)skb->data + iphdroff; - -manip_addr: - if (maniptype == NF_NAT_MANIP_SRC) - ipv6h->saddr = target->src.u3.in6; - else - ipv6h->daddr = target->dst.u3.in6; - - return true; -} - -static void nf_nat_ipv6_csum_update(struct sk_buff *skb, - unsigned int iphdroff, __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype) -{ - const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); - const struct in6_addr *oldip, *newip; - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = &ipv6h->saddr; - newip = &t->src.u3.in6; - } else { - oldip = &ipv6h->daddr; - newip = &t->dst.u3.in6; - } - inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, true); -} - -static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, - u8 proto, void *data, __sum16 *check, - int datalen, int oldlen) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + - (data - (void *)skb->data); - skb->csum_offset = (void *)check - data; - *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - datalen, proto, 0); - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), true); -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_NAT_V6_MINIP]) { - nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], - sizeof(struct in6_addr)); - range->flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (tb[CTA_NAT_V6_MAXIP]) - nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], - sizeof(struct in6_addr)); - else - range->max_addr = range->min_addr; - - return 0; -} -#endif - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { - .l3proto = NFPROTO_IPV6, - .manip_pkt = nf_nat_ipv6_manip_pkt, - .csum_update = nf_nat_ipv6_csum_update, - .csum_recalc = nf_nat_ipv6_csum_recalc, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, -#endif -#ifdef CONFIG_XFRM - .decode_session = nf_nat_ipv6_decode_session, -#endif -}; - -int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - unsigned int hdrlen) -{ - struct { - struct icmp6hdr icmp6; - struct ipv6hdr ip6; - } *inside; - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - struct nf_conntrack_tuple target; - unsigned long statusbit; - - WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) - return 0; - - inside = (void *)skb->data + hdrlen; - if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply direction */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - inside = (void *)skb->data + hdrlen; - inside->icmp6.icmp6_cksum = 0; - inside->icmp6.icmp6_cksum = - csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - skb->len - hdrlen, IPPROTO_ICMPV6, - skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - target.dst.protonum = IPPROTO_ICMPV6; - if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); - -static unsigned int -nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be16 frag_off; - int hdrlen; - u8 nexthdr; - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - * have dropped it. Hence it's the user's responsibilty to - * packet filter it out, or implement conntrack/NAT for that - * protocol. 8) --RR - */ - if (!ct) - return NF_ACCEPT; - - if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { - nexthdr = ipv6_hdr(skb)->nexthdr; - hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - state->hook, - hdrlen)) - return NF_DROP; - else - return NF_ACCEPT; - } - } - - return nf_nat_inet_fn(priv, skb, state); -} - -static unsigned int -nf_nat_ipv6_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct in6_addr daddr = ipv6_hdr(skb)->daddr; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_ipv6_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - int err; -#endif - unsigned int ret; - - ret = nf_nat_ipv6_fn(priv, skb, state); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3) || - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } -#endif - return ret; -} - -static unsigned int -nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - int err; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, - &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#ifdef CONFIG_XFRM - else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#endif - } - return ret; -} - -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_in, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_out, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_local_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_NAT_SRC, - }, -}; - -int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn); - -void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn); - -static int __init nf_nat_l3proto_ipv6_init(void) -{ - return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); -} - -static void __exit nf_nat_l3proto_ipv6_exit(void) -{ - nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); - -module_init(nf_nat_l3proto_ipv6_init); -module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c deleted file mode 100644 index 0ad0da5a2600..000000000000 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include <linux/kernel.h> -#include <linux/atomic.h> -#include <linux/netdevice.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/netfilter/nf_nat.h> -#include <net/addrconf.h> -#include <net/ipv6.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -#define MAX_WORK_COUNT 16 - -static atomic_t v6_worker_count; - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - struct in6_addr src; - struct nf_conn *ct; - struct nf_nat_range2 newrange; - - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY))); - - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) - return NF_DROP; - - nat = nf_ct_nat_ext_add(ct); - if (nat) - nat->masq_index = out->ifindex; - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = src; - newrange.max_addr.in6 = src; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); - -static int device_cmp(struct nf_conn *ct, void *ifindex) -{ - const struct nf_conn_nat *nat = nfct_nat(ct); - - if (!nat) - return 0; - if (nf_ct_l3num(ct) != NFPROTO_IPV6) - return 0; - return nat->masq_index == (int)(long)ifindex; -} - -static int masq_device_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - const struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net *net = dev_net(dev); - - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); - - return NOTIFY_DONE; -} - -static struct notifier_block masq_dev_notifier = { - .notifier_call = masq_device_event, -}; - -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - -/* ipv6 inet notifier is an atomic notifier, i.e. we cannot - * schedule. - * - * Unfortunately, nf_ct_iterate_cleanup_net can run for a long - * time if there are lots of conntracks and the system - * handles high softirq load, so it frequently calls cond_resched - * while iterating the conntrack table. - * - * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. - * - * As we can have 'a lot' of inet_events (depending on amount - * of ipv6 addresses being deleted), we also need to add an upper - * limit to the number of queued work items. - */ -static int masq_inet6_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct inet6_ifaddr *ifa = ptr; - const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; - - if (event != NETDEV_DOWN || - atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) - return NOTIFY_DONE; - - dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - - if (!try_module_get(THIS_MODULE)) - goto err_module; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); - - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); - return NOTIFY_DONE; -} - -static struct notifier_block masq_inet6_notifier = { - .notifier_call = masq_inet6_event, -}; - -static int masq_refcnt; -static DEFINE_MUTEX(masq_mutex); - -int nf_nat_masquerade_ipv6_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) - goto out_unlock; - - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - - ret = register_inet6addr_notifier(&masq_inet6_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); - -void nf_nat_masquerade_ipv6_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt > 0) - goto out_unlock; - - unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index b9c8a763c863..02e9228641e0 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c deleted file mode 100644 index 8a081ad7d5db..000000000000 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv6.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/ipv6.h> - -static unsigned int nft_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_l3proto_ipv6_register_fn(net, ops); -} - -static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_l3proto_ipv6_unregister_fn(net, ops); -} - -static const struct nft_chain_type nft_chain_nat_ipv6 = { - .name = "nat", - .type = NFT_CHAIN_T_NAT, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .hooks = { - [NF_INET_PRE_ROUTING] = nft_nat_do_chain, - [NF_INET_POST_ROUTING] = nft_nat_do_chain, - [NF_INET_LOCAL_OUT] = nft_nat_do_chain, - [NF_INET_LOCAL_IN] = nft_nat_do_chain, - }, - .ops_register = nft_nat_ipv6_reg, - .ops_unregister = nft_nat_ipv6_unreg, -}; - -static int __init nft_chain_nat_ipv6_init(void) -{ - nft_register_chain_type(&nft_chain_nat_ipv6); - - return 0; -} - -static void __exit nft_chain_nat_ipv6_exit(void) -{ - nft_unregister_chain_type(&nft_chain_nat_ipv6); -} - -module_init(nft_chain_nat_ipv6_init); -module_exit(nft_chain_nat_ipv6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c deleted file mode 100644 index e06c82e9dfcd..000000000000 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_masq.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - -static void -nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_masq_ipv6_type; -static const struct nft_expr_ops nft_masq_ipv6_ops = { - .type = &nft_masq_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, - .init = nft_masq_init, - .destroy = nft_masq_ipv6_destroy, - .dump = nft_masq_dump, - .validate = nft_masq_validate, -}; - -static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "masq", - .ops = &nft_masq_ipv6_ops, - .policy = nft_masq_policy, - .maxattr = NFTA_MASQ_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_masq_ipv6_module_init(void) -{ - int ret; - - ret = nft_register_expr(&nft_masq_ipv6_type); - if (ret < 0) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; -} - -static void __exit nft_masq_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); -} - -module_init(nft_masq_ipv6_module_init); -module_exit(nft_masq_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c deleted file mode 100644 index 74269865acc8..000000000000 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_redir.h> -#include <net/netfilter/nf_nat_redirect.h> - -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - -static void -nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_redir_ipv6_type; -static const struct nft_expr_ops nft_redir_ipv6_ops = { - .type = &nft_redir_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, - .init = nft_redir_init, - .destroy = nft_redir_ipv6_destroy, - .dump = nft_redir_dump, - .validate = nft_redir_validate, -}; - -static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "redir", - .ops = &nft_redir_ipv6_ops, - .policy = nft_redir_policy, - .maxattr = NFTA_REDIR_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_redir_ipv6_module_init(void) -{ - return nft_register_expr(&nft_redir_ipv6_type); -} - -static void __exit nft_redir_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_redir_ipv6_type); -} - -module_init(nft_redir_ipv6_module_init); -module_exit(nft_redir_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 36a3d8dc61f5..1a832f5e190b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip6_frag_expire(struct timer_list *t) { @@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset) { - struct sk_buff *prev, *next; - struct net_device *dev; - int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); + int offset, end, fragsize; + struct sk_buff *prev_tail; + struct net_device *dev; + int err = -ENOENT; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; + err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); + /* note that if prob_offset is set, the skb is freed elsewhere, + * we do not free it here. + */ return -1; } @@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (end == offset) goto discard_fq; + err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto discard_fq; - if (pskb_trim_rcsum(skb, end - offset)) + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto discard_fq; - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->sk share the same location */ + /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; - if (dev) - fq->iif = dev->ifindex; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -246,44 +216,48 @@ found: if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = ip6_frag_reasm(fq, prev, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev); skb->_skb_refdst = orefdst; - return res; + return err; } skb_dst_drop(skb); - return -1; + return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q); -err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); +err: kfree_skb(skb); - return -1; + return err; } /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; unsigned int nhoff; - int sum_truesize; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); @@ -292,128 +266,47 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (unlikely(ecn == 0xff)) goto out_fail; - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto out_oom; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_oom; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - if (skb_mac_header_was_set(head)) - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - fp->sk = NULL; - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + if (skb_mac_header_was_set(skb)) + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); + + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->nhoff = nhoff; - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; - IP6CB(head)->frag_max_size = fq->q.max_size; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->nhoff = nhoff; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - skb_postpush_rcsum(head, skb_network_header(head), - skb_network_header_len(head)); + skb_postpush_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oversize: @@ -464,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - goto fail_hdr; - iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { @@ -485,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); + /* icmpv6_param_prob() calls kfree_skb(skb) */ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); } return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8dad1d690b78..4ef4bbdb49d4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2289,14 +2289,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -4832,6 +4826,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg) arg->cb->nlh->nlmsg_seq, flags); } +static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_IIF: + case RTA_OIF: + case RTA_MARK: + case RTA_UID: + case RTA_SPORT: + case RTA_DPORT: + case RTA_IP_PROTO: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4846,8 +4907,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi6 fl6 = {}; bool fibmatch; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b81eb7cb815e..57ef69a10889 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -220,8 +220,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - if (__ipv6_only_sock(sk)) return -ENETUNREACH; @@ -1864,7 +1862,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : |