diff options
Diffstat (limited to 'net')
63 files changed, 1275 insertions, 278 deletions
diff --git a/net/Kconfig b/net/Kconfig index f19c0c3b9589..102f781a0131 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -297,7 +297,8 @@ config BPF_JIT Note, admin should enable this feature changing: /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) config NET_FLOW_LIMIT bool diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4ac11574117c..4f598dc2d916 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -65,7 +65,7 @@ static inline unsigned long hold_time(const struct net_bridge *br) static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { - return !fdb->is_static && + return !fdb->is_static && !fdb->added_by_external_learn && time_before_eq(fdb->updated + hold_time(br), jiffies); } diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index b2b79a070162..6d2c4eed2dc8 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -85,6 +85,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, return 0; out: dst_release(&vlan->tinfo.tunnel_dst->dst); + vlan->tinfo.tunnel_dst = NULL; + vlan->tinfo.tunnel_id = 0; return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 2f1bbe1bf67c..05d19c6acf94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4510,6 +4510,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (&ptype->list == head) goto normal; + if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { + ret = GRO_CONSUMED; + goto ok; + } + same_flow = NAPI_GRO_CB(skb)->same_flow; ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; @@ -4614,6 +4619,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) case GRO_HELD: case GRO_MERGED: + case GRO_CONSUMED: break; } @@ -4685,6 +4691,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_MERGED: + case GRO_CONSUMED: break; } diff --git a/net/core/filter.c b/net/core/filter.c index 0b753cbb2536..e466e0040137 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3296,47 +3296,47 @@ static const struct bpf_verifier_ops cg_sock_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __read_mostly = { +static struct bpf_prog_type_list sk_filter_type __ro_after_init = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static struct bpf_prog_type_list sched_cls_type __read_mostly = { +static struct bpf_prog_type_list sched_cls_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; -static struct bpf_prog_type_list sched_act_type __read_mostly = { +static struct bpf_prog_type_list sched_act_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -static struct bpf_prog_type_list xdp_type __read_mostly = { +static struct bpf_prog_type_list xdp_type __ro_after_init = { .ops = &xdp_ops, .type = BPF_PROG_TYPE_XDP, }; -static struct bpf_prog_type_list cg_skb_type __read_mostly = { +static struct bpf_prog_type_list cg_skb_type __ro_after_init = { .ops = &cg_skb_ops, .type = BPF_PROG_TYPE_CGROUP_SKB, }; -static struct bpf_prog_type_list lwt_in_type __read_mostly = { +static struct bpf_prog_type_list lwt_in_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_IN, }; -static struct bpf_prog_type_list lwt_out_type __read_mostly = { +static struct bpf_prog_type_list lwt_out_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_OUT, }; -static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { +static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { .ops = &lwt_xmit_ops, .type = BPF_PROG_TYPE_LWT_XMIT, }; -static struct bpf_prog_type_list cg_sock_type __read_mostly = { +static struct bpf_prog_type_list cg_sock_type __ro_after_init = { .ops = &cg_sock_ops, .type = BPF_PROG_TYPE_CGROUP_SOCK }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb12e07ffef..e7c12caa20c8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2923,7 +2923,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); - call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); + if (index == NEIGH_VAR_DELAY_PROBE_TIME) + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index adfb54b896da..e3286d32eca5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -876,8 +876,6 @@ static size_t rtnl_port_size(const struct net_device *dev, { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ - + nla_total_size(sizeof(struct ifla_port_vsi)) - /* PORT_VSI_TYPE */ + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */ + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */ + nla_total_size(1) /* PROT_VDP_REQUEST */ @@ -1491,14 +1489,19 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_VF] = { .type = NLA_U32 }, [IFLA_PORT_PROFILE] = { .type = NLA_STRING, .len = PORT_PROFILE_MAX }, - [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_port_vsi)}, [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY, .len = PORT_UUID_MAX }, [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING, .len = PORT_UUID_MAX }, [IFLA_PORT_REQUEST] = { .type = NLA_U8, }, [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, + + /* Unused, but we need to keep it here since user space could + * fill it. It's also broken with regard to NLA_BINARY use in + * combination with structs. + */ + [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_port_vsi) }, }; static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa72eb0399c..4ead336e14ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -334,6 +334,13 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dointvec, }, + { + .procname = "bpf_jit_kallsyms", + .data = &bpf_jit_kallsyms, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, # endif #endif { diff --git a/net/dccp/input.c b/net/dccp/input.c index ba347184bda9..8fedc2d49770 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - goto discard; + consume_skb(skb); + return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d21be3b3d9cc..1446810047f5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -475,7 +475,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e30f9caddae8..91a2557942fa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -361,6 +361,19 @@ config INET_ESP If unsure, say Y. +config INET_ESP_OFFLOAD + tristate "IP: ESP transformation offload" + depends on INET_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET_IPCOMP tristate "IP: IPComp transformation" select INET_XFRM_TUNNEL diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 48af58a5686e..c6d4238ff94a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o +obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 685ba53df2d1..602d40f43687 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1423,7 +1423,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c new file mode 100644 index 000000000000..1de442632406 --- /dev/null +++ b/net/ipv4/esp4_offload.c @@ -0,0 +1,106 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/protocol.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> +#include <linux/err.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include <linux/scatterlist.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/udp.h> + +static struct sk_buff **esp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp4_offload = { + .callbacks = { + .gro_receive = esp4_gro_receive, + }, +}; + +static int __init esp4_offload_init(void) +{ + return inet_add_offload(&esp4_offload, IPPROTO_ESP); +} + +static void __exit esp4_offload_exit(void) +{ + inet_del_offload(&esp4_offload, IPPROTO_ESP); +} + +module_init(esp4_offload_init); +module_exit(esp4_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d44a6989e76d..da385ae997a3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1288,7 +1288,7 @@ new_segment: } else { skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, copy); - get_page(pfrag->page); + page_ref_inc(pfrag->page); } pfrag->offset += copy; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 61f272a99a49..22548b5f05cb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -83,7 +83,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tcp_skb_pcount(skb)); } -/* SND.NXT, if window was not shrunk. +/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one + * window scaling factor due to loss of precision. * If window has been shrunk, what should we make? It is not clear at all. * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( * Anything in between SND.UNA...SND.UNA+SND.WND also can be already @@ -93,7 +94,9 @@ static inline __u32 tcp_acceptable_seq(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - if (!before(tcp_wnd_end(tp), tp->snd_nxt)) + if (!before(tcp_wnd_end(tp), tp->snd_nxt) || + (tp->rx_opt.wscale_ok && + ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale)))) return tp->snd_nxt; else return tcp_wnd_end(tp); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 62e1e72db461..1fc684111ce6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -40,6 +40,7 @@ drop: int xfrm4_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); struct iphdr *iph = ip_hdr(skb); iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; @@ -53,6 +54,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return 0; + } + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm4_rcv_encap_finish); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index fd840c7d75ea..4acc0508c5eb 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -43,6 +43,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -50,7 +51,8 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6a7ff6957535..71b4ecc195c7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,8 +17,6 @@ #include <net/ip.h> #include <net/l3mdev.h> -static struct xfrm_policy_afinfo xfrm4_policy_afinfo; - static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, @@ -219,7 +217,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - xfrm4_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } @@ -271,8 +269,7 @@ static struct dst_ops xfrm4_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { - .family = AF_INET, +static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, @@ -376,7 +373,7 @@ static struct pernet_operations __net_initdata xfrm4_net_ops = { static void __init xfrm4_policy_init(void) { - xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); + xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET); } void __init xfrm4_init(void) diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index dccefa9d84cf..8dd0e6ab8606 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -188,9 +188,8 @@ static const struct net_protocol ipcomp4_protocol = { .netns_ok = 1, }; -static struct xfrm_input_afinfo xfrm4_input_afinfo = { +static const struct xfrm_input_afinfo xfrm4_input_afinfo = { .family = AF_INET, - .owner = THIS_MODULE, .callback = xfrm4_rcv_cb, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 3c7c76b2a7ba..e2afe677a9d9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -75,6 +75,19 @@ config INET6_ESP If unsure, say Y. +config INET6_ESP_OFFLOAD + tristate "IPv6: ESP transformation offload" + depends on INET6_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index a9e9fec387ce..217e9ff0e24b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -30,6 +30,7 @@ ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o +obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a3eaafd87100..eec27f87efac 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (np->sndflow) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type == IPV6_ADDR_ANY) { + if (ipv6_addr_any(&usin->sin6_addr)) { /* * connect to self */ - usin->sin6_addr.s6_addr[15] = 0x01; + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; } + addr_type = ipv6_addr_type(&usin->sin6_addr); + daddr = &usin->sin6_addr; - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; if (__ipv6_only_sock(sk)) { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c new file mode 100644 index 000000000000..d914eb93204a --- /dev/null +++ b/net/ipv6/esp6_offload.c @@ -0,0 +1,108 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/protocol.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> +#include <linux/err.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include <linux/scatterlist.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> +#include <linux/icmpv6.h> + +static struct sk_buff **esp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp6_offload = { + .callbacks = { + .gro_receive = esp6_gro_receive, + }, +}; + +static int __init esp6_offload_init(void) +{ + return inet6_add_offload(&esp6_offload, IPPROTO_ESP); +} + +static void __exit esp6_offload_exit(void) +{ + inet6_del_offload(&esp6_offload, IPPROTO_ESP); +} + +module_init(esp6_offload_init); +module_exit(esp6_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fc7b4017ba24..0838e6d01d2e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -253,7 +253,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a75871c62328..528b3c1f3fde 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1022,6 +1022,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } } #endif + if (ipv6_addr_v4mapped(&fl6->saddr) && + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { + err = -EAFNOSUPPORT; + goto out_err_release; + } return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5d27212db2f..21c719965b6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -149,8 +149,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 0x1; + if (ipv6_addr_any(&usin->sin6_addr)) { + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; + } addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -189,7 +194,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * TCP over IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index df71ba05f41d..4e4c401e3bc6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1046,6 +1046,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; + if (ipv6_addr_any(daddr) && + ipv6_addr_v4mapped(&np->saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + daddr); break; case AF_INET: goto do_udp_sendmsg; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b5789562aded..08a807b29298 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -33,6 +33,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); + skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; @@ -44,6 +46,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return -1; + } + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_rcv_finish); @@ -69,18 +76,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int i = 0; - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + if (secpath_set(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 4e344105b3fd..4439ee44c8b0 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -47,6 +47,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -55,7 +56,8 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0f71c01d728..79651bc71bf0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -25,8 +25,6 @@ #include <net/mip6.h> #endif -static struct xfrm_policy_afinfo xfrm6_policy_afinfo; - static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) @@ -220,7 +218,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - xfrm6_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } @@ -291,8 +289,7 @@ static struct dst_ops xfrm6_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { - .family = AF_INET6, +static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, @@ -305,7 +302,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { static int __init xfrm6_policy_init(void) { - return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 54d13f8dbbae..b2dc8ce49378 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -162,9 +162,8 @@ static const struct inet6_protocol ipcomp6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -static struct xfrm_input_afinfo xfrm6_input_afinfo = { +static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, - .owner = THIS_MODULE, .callback = xfrm6_rcv_cb, }; diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index acbe61c7e683..160dc89335e2 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -383,9 +383,6 @@ EXPORT_SYMBOL(hashbin_new); * for deallocating this structure if it's complex. If not the user can * just supply kfree, which should take care of the job. */ -#ifdef CONFIG_LOCKDEP -static int hashbin_lock_depth = 0; -#endif int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) { irda_queue_t* queue; @@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;); /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags, - hashbin_lock_depth++); - } + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); /* * Free the entries in the hashbin, TODO: use hashbin_clear when * it has been shown to work */ for (i = 0; i < HASHBIN_SIZE; i ++ ) { - queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); - while (queue ) { - if (free_func) - (*free_func)(queue); - queue = dequeue_first( - (irda_queue_t**) &hashbin->hb_queue[i]); + while (1) { + queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); + + if (!queue) + break; + + if (free_func) { + if (hashbin->hb_type & HB_LOCK) + spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); + free_func(queue); + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); + } } } @@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) hashbin->magic = ~HB_MAGIC; /* Release lock */ - if ( hashbin->hb_type & HB_LOCK) { + if (hashbin->hb_type & HB_LOCK) spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); -#ifdef CONFIG_LOCKDEP - hashbin_lock_depth--; -#endif - } /* * Free the hashbin structure diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 64f0e8531af0..a646f3481240 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1044,8 +1044,10 @@ wait_for_memory: } else { /* Message not complete, save state */ partial_message: - kcm->seq_skb = head; - kcm_tx_msg(head)->last_skb = skb; + if (head) { + kcm->seq_skb = head; + kcm_tx_msg(head)->last_skb = skb; + } } KCM_STATS_ADD(kcm->stats.tx_bytes, copied); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3e821daf9dd4..8bc5a1bd2d45 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d0e1e804ebd7..5404d0d195cc 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 64d3bf269a26..3818686182b2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -7,6 +7,7 @@ #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/mpls.h> +#include <linux/netconf.h> #include <linux/vmalloc.h> #include <linux/percpu.h> #include <net/ip.h> @@ -960,15 +961,215 @@ static size_t mpls_get_stats_af_size(const struct net_device *dev) return nla_total_size_64bit(sizeof(struct mpls_link_stats)); } +static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev, + u32 portid, u32 seq, int event, + unsigned int flags, int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + bool all = false; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (!nlh) + return -EMSGSIZE; + + if (type == NETCONFA_ALL) + all = true; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_MPLS; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0) + goto nla_put_failure; + + if ((all || type == NETCONFA_INPUT) && + nla_put_s32(skb, NETCONFA_INPUT, + mdev->input_enabled) < 0) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mpls_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + bool all = false; + + if (type == NETCONFA_ALL) + all = true; + + if (all || type == NETCONFA_INPUT) + size += nla_total_size(4); + + return size; +} + +static void mpls_netconf_notify_devconf(struct net *net, int type, + struct mpls_dev *mdev) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL); + if (!skb) + goto errout; + + err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF, + 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err); +} + +static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, +}; + +static int mpls_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX + 1]; + struct netconfmsg *ncm; + struct net_device *dev; + struct mpls_dev *mdev; + struct sk_buff *skb; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_mpls_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + dev = __dev_get_by_index(net, ifindex); + if (!dev) + goto errout; + + mdev = mpls_dev_get(dev); + if (!mdev) + goto errout; + + err = -ENOBUFS; + skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); + if (!skb) + goto errout; + + err = mpls_netconf_fill_devconf(skb, mdev, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + NETCONFA_ALL); + if (err < 0) { + /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + +static int mpls_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct hlist_head *head; + struct net_device *dev; + struct mpls_dev *mdev; + int idx, s_idx; + int h, s_h; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + mdev = mpls_dev_get(dev); + if (!mdev) + goto cont; + if (mpls_netconf_fill_devconf(skb, mdev, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL) < 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ (&((struct mpls_dev *)0)->field) +static int mpls_conf_proc(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int oval = *(int *)ctl->data; + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write) { + struct mpls_dev *mdev = ctl->extra1; + int i = (int *)ctl->data - (int *)mdev; + struct net *net = ctl->extra2; + int val = *(int *)ctl->data; + + if (i == offsetof(struct mpls_dev, input_enabled) && + val != oval) { + mpls_netconf_notify_devconf(net, + NETCONFA_INPUT, + mdev); + } + } + + return ret; +} + static const struct ctl_table mpls_dev_table[] = { { .procname = "input", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = mpls_conf_proc, .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), }, { } @@ -978,6 +1179,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, struct mpls_dev *mdev) { char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + struct net *net = dev_net(dev); struct ctl_table *table; int i; @@ -988,8 +1190,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev, /* Table data contains only offsets relative to the base of * the mdev at this point, so make them absolute. */ - for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) + for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) { table[i].data = (char *)mdev + (uintptr_t)table[i].data; + table[i].extra1 = mdev; + table[i].extra2 = net; + } snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); @@ -1041,6 +1246,7 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) if (err) goto free; + mdev->dev = dev; rcu_assign_pointer(dev->mpls_ptr, mdev); return mdev; @@ -1861,6 +2067,8 @@ static int __init mpls_init(void) rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); + rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, + mpls_netconf_dump_devconf, NULL); err = 0; out: return err; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index d97243034605..76360d8b9579 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -16,7 +16,7 @@ struct mpls_pcpu_stats { struct mpls_dev { int input_enabled; - + struct net_device *dev; struct mpls_pcpu_stats __percpu *stats; struct ctl_table_header *sysctl; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c2d452eab0c5..85cd59526670 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -339,9 +339,7 @@ static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) /* Initialize labels for a new, yet to be committed conntrack entry. Note that * since the new connection is not yet confirmed, and thus no-one else has - * access to it's labels, we simply write them over. Also, we refrain from - * triggering events, as receiving change events before the create event would - * be confusing. + * access to it's labels, we simply write them over. */ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, const struct ovs_key_ct_labels *labels, @@ -374,6 +372,11 @@ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, & mask->ct_labels_32[i]); } + /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the + * IPCT_LABEL bit it set in the event cache. + */ + nf_conntrack_event_cache(IPCT_LABEL, ct); + memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a984f6f9ab4e..2bd0d1949312 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1505,6 +1505,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; + if (f->num_members == 1) + dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1521,6 +1523,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; + if (f->num_members == 0) + __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1627,6 +1631,7 @@ static void fanout_release_data(struct packet_fanout *f) static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { + struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; @@ -1649,23 +1654,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { - po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); - if (!po->rollover) - return -ENOMEM; - atomic_long_set(&po->rollover->num, 0); - atomic_long_set(&po->rollover->num_huge, 0); - atomic_long_set(&po->rollover->num_failed, 0); + err = -ENOMEM; + rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); + if (!rollover) + goto out; + atomic_long_set(&rollover->num, 0); + atomic_long_set(&rollover->num_huge, 0); + atomic_long_set(&rollover->num_failed, 0); + po->rollover = rollover; } - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1695,7 +1705,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; - dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1712,36 +1721,40 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } out: - mutex_unlock(&fanout_mutex); - if (err) { - kfree(po->rollover); + if (err && rollover) { + kfree(rollover); po->rollover = NULL; } + mutex_unlock(&fanout_mutex); return err; } -static void fanout_release(struct sock *sk) +/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes + * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. + * It is the responsibility of the caller to call fanout_release_data() and + * free the returned packet_fanout (after synchronize_net()) + */ +static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; + mutex_lock(&fanout_mutex); f = po->fanout; - if (!f) - return; + if (f) { + po->fanout = NULL; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + if (atomic_dec_and_test(&f->sk_ref)) + list_del(&f->list); + else + f = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); + if (po->rollover) + kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); - if (po->rollover) - kfree_rcu(po->rollover, rcu); + return f; } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, @@ -2928,6 +2941,7 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; @@ -2967,9 +2981,14 @@ static int packet_release(struct socket *sock) packet_set_ring(sk, &req_u, 1, 1); } - fanout_release(sk); + f = fanout_release(sk); synchronize_net(); + + if (f) { + fanout_release_data(f); + kfree(f); + } /* * Now the socket is dead. No more input will appear. */ @@ -3921,7 +3940,6 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 5e72de10c484..6ab39dbcca01 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -770,7 +770,6 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); if (work_alloc != 1) { - rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); rds_ib_stats_inc(s_ib_tx_ring_full); ret = -ENOMEM; goto out; diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 8fc6ea347182..b9da4d6b914f 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -2,7 +2,9 @@ # Makefile for Linux kernel RxRPC # -af-rxrpc-y := \ +obj-$(CONFIG_AF_RXRPC) += rxrpc.o + +rxrpc-y := \ af_rxrpc.o \ call_accept.o \ call_event.o \ @@ -26,8 +28,6 @@ af-rxrpc-y := \ skbuff.o \ utils.o -af-rxrpc-$(CONFIG_PROC_FS) += proc.o -af-rxrpc-$(CONFIG_RXKAD) += rxkad.o -af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o - -obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o +rxrpc-$(CONFIG_PROC_FS) += proc.o +rxrpc-$(CONFIG_RXKAD) += rxkad.o +rxrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index d9c97018317d..80f688436dd7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -148,6 +148,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_bpf_offload bpf_offload = {}; struct tc_to_netdev offload; + int err; offload.type = TC_SETUP_CLSBPF; offload.cls_bpf = &bpf_offload; @@ -159,8 +160,13 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.exts_integrated = prog->exts_integrated; bpf_offload.gen_flags = prog->gen_flags; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + + if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, @@ -511,6 +517,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, return ret; } + if (!tc_in_hw(prog->gen_flags)) + prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0826c8ec3a76..9d0c99d2e9fb 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -273,6 +273,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + if (!err) + f->flags |= TCA_CLS_FLAGS_IN_HW; if (tc_skip_sw(f->flags)) return err; @@ -912,6 +914,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; } + if (!tc_in_hw(fnew->flags)) + fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (fold) { if (!tc_skip_sw(fold->flags)) rhashtable_remove_fast(&head->ht, &fold->ht_node, @@ -1229,7 +1234,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) goto nla_put_failure; - nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); + if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) + goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts)) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index f2141cb55562..224eb2c14346 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -56,6 +56,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, struct net_device *dev = tp->q->dev_queue->dev; struct tc_to_netdev offload; struct tc_cls_matchall_offload mall_offload = {0}; + int err; offload.type = TC_SETUP_MATCHALL; offload.cls_mall = &mall_offload; @@ -63,8 +64,12 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &offload); + if (!err) + head->flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static void mall_destroy_hw_filter(struct tcf_proto *tp, @@ -194,6 +199,9 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, } } + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); if (head) @@ -244,6 +252,9 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid)) goto nla_put_failure; + if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a6ec3e4b57ab..4dbe0c680fe6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -523,6 +523,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); + + if (!err) + n->flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(flags)) return err; @@ -895,6 +899,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); @@ -1014,6 +1021,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (err) goto errhw; + if (!tc_in_hw(n->flags)) + n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a13c15e8f087..bcf49cd22786 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -440,7 +440,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) EXPORT_SYMBOL(qdisc_put_rtab); static LIST_HEAD(qdisc_stab_list); -static DEFINE_SPINLOCK(qdisc_stab_lock); static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, @@ -474,20 +473,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock(&qdisc_stab_lock); - list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock(&qdisc_stab_lock); return stab; } - spin_unlock(&qdisc_stab_lock); - stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); @@ -497,9 +491,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock(&qdisc_stab_lock); return stab; } @@ -514,14 +506,10 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock(&qdisc_stab_lock); - if (--tab->refcnt == 0) { list_del(&tab->list); call_rcu_bh(&tab->rcu, stab_kfree_rcu); } - - spin_unlock(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); diff --git a/net/sctp/input.c b/net/sctp/input.c index 704ad19c1565..fc458968fe4b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -872,6 +872,8 @@ void sctp_transport_hashtable_destroy(void) int sctp_hash_transport(struct sctp_transport *t) { + struct sctp_transport *transport; + struct rhlist_head *tmp, *list; struct sctp_hash_cmp_arg arg; int err; @@ -882,8 +884,19 @@ int sctp_hash_transport(struct sctp_transport *t) arg.paddr = &t->ipaddr; arg.lport = htons(t->asoc->base.bind_addr.port); + list = rhltable_lookup(&sctp_transport_hashtable, &arg, + sctp_hash_params); + + rhl_for_each_entry_rcu(transport, tmp, list, node) + if (transport->asoc->ep == t->asoc->ep) { + err = -EEXIST; + goto out; + } + err = rhltable_insert_key(&sctp_transport_hashtable, &arg, &t->node, sctp_hash_params); + +out: if (err) pr_err_once("insert transport fail, errno %d\n", err); diff --git a/net/sctp/output.c b/net/sctp/output.c index 814eac047467..85406d5f8f41 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -704,18 +704,15 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, * unacknowledged. */ - if (sctp_sk(asoc->base.sk)->nodelay) - /* Nagle disabled */ + if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && + !chunk->msg->force_delay) + /* Nothing unacked */ return SCTP_XMIT_OK; if (!sctp_packet_empty(packet)) /* Append to packet */ return SCTP_XMIT_OK; - if (inflight == 0) - /* Nothing unacked */ - return SCTP_XMIT_OK; - if (!sctp_state(asoc, ESTABLISHED)) return SCTP_XMIT_OK; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7f8dbf2c6cee..969a30c7bb54 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3617,7 +3617,7 @@ struct sctp_chunk *sctp_make_strreset_req( __u16 stream_len = stream_num * 2; struct sctp_strreset_inreq inreq; struct sctp_chunk *retval; - __u16 outlen, inlen, i; + __u16 outlen, inlen; outlen = (sizeof(outreq) + stream_len) * out; inlen = (sizeof(inreq) + stream_len) * in; @@ -3626,9 +3626,6 @@ struct sctp_chunk *sctp_make_strreset_req( if (!retval) return NULL; - for (i = 0; i < stream_num; i++) - stream_list[i] = htons(stream_list[i]); - if (outlen) { outreq.param_hdr.type = SCTP_PARAM_RESET_OUT_REQUEST; outreq.param_hdr.length = htons(outlen); @@ -3653,9 +3650,6 @@ struct sctp_chunk *sctp_make_strreset_req( sctp_addto_chunk(retval, stream_len, stream_list); } - for (i = 0; i < stream_num; i++) - stream_list[i] = ntohs(stream_list[i]); - return retval; } @@ -3733,3 +3727,136 @@ struct sctp_chunk *sctp_make_strreset_addstrm( return retval; } + +/* RE-CONFIG 4.4 (RESP) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 16 | Parameter Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Response Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Result | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_resp( + const struct sctp_association *asoc, + __u32 result, __u32 sn) +{ + struct sctp_strreset_resp resp; + __u16 length = sizeof(resp); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, length); + if (!retval) + return NULL; + + resp.param_hdr.type = SCTP_PARAM_RESET_RESPONSE; + resp.param_hdr.length = htons(length); + resp.response_seq = htonl(sn); + resp.result = htonl(result); + + sctp_addto_chunk(retval, sizeof(resp), &resp); + + return retval; +} + +/* RE-CONFIG 4.4 OPTIONAL (TSNRESP) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 16 | Parameter Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Response Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Result | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sender's Next TSN (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Receiver's Next TSN (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_tsnresp( + struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, __u32 receiver_tsn) +{ + struct sctp_strreset_resptsn tsnresp; + __u16 length = sizeof(tsnresp); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, length); + if (!retval) + return NULL; + + tsnresp.param_hdr.type = SCTP_PARAM_RESET_RESPONSE; + tsnresp.param_hdr.length = htons(length); + + tsnresp.response_seq = htonl(sn); + tsnresp.result = htonl(result); + tsnresp.senders_next_tsn = htonl(sender_tsn); + tsnresp.receivers_next_tsn = htonl(receiver_tsn); + + sctp_addto_chunk(retval, sizeof(tsnresp), &tsnresp); + + return retval; +} + +bool sctp_verify_reconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, + struct sctp_paramhdr **errp) +{ + struct sctp_reconf_chunk *hdr; + union sctp_params param; + __u16 last = 0, cnt = 0; + + hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; + sctp_walk_params(param, hdr, params) { + __u16 length = ntohs(param.p->length); + + *errp = param.p; + if (cnt++ > 2) + return false; + switch (param.p->type) { + case SCTP_PARAM_RESET_OUT_REQUEST: + if (length < sizeof(struct sctp_strreset_outreq) || + (last && last != SCTP_PARAM_RESET_RESPONSE && + last != SCTP_PARAM_RESET_IN_REQUEST)) + return false; + break; + case SCTP_PARAM_RESET_IN_REQUEST: + if (length < sizeof(struct sctp_strreset_inreq) || + (last && last != SCTP_PARAM_RESET_OUT_REQUEST)) + return false; + break; + case SCTP_PARAM_RESET_RESPONSE: + if ((length != sizeof(struct sctp_strreset_resp) && + length != sizeof(struct sctp_strreset_resptsn)) || + (last && last != SCTP_PARAM_RESET_RESPONSE && + last != SCTP_PARAM_RESET_OUT_REQUEST)) + return false; + break; + case SCTP_PARAM_RESET_TSN_REQUEST: + if (length != + sizeof(struct sctp_strreset_tsnreq) || last) + return false; + break; + case SCTP_PARAM_RESET_ADD_IN_STREAMS: + if (length != sizeof(struct sctp_strreset_addstrm) || + (last && last != SCTP_PARAM_RESET_ADD_OUT_STREAMS)) + return false; + break; + case SCTP_PARAM_RESET_ADD_OUT_STREAMS: + if (length != sizeof(struct sctp_strreset_addstrm) || + (last && last != SCTP_PARAM_RESET_ADD_IN_STREAMS)) + return false; + break; + default: + return false; + } + + last = param.p->type; + } + + return true; +} diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 51abcc90fe75..25384fa82ba9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -872,6 +872,10 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, if (!sctp_style(sk, UDP)) sk->sk_state_change(sk); } + + if (sctp_state(asoc, SHUTDOWN_PENDING) && + !sctp_outq_is_empty(&asoc->outqueue)) + sctp_outq_uncork(&asoc->outqueue, GFP_ATOMIC); } /* Helper function to delete an association. */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d8798ddda726..e03bb1aab4d0 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3834,6 +3834,60 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, return SCTP_DISPOSITION_DISCARD; } +/* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */ +sctp_disposition_t sctp_sf_do_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_paramhdr *err_param = NULL; + struct sctp_chunk *chunk = arg; + struct sctp_reconf_chunk *hdr; + union sctp_params param; + + if (!sctp_vtag_verify(chunk, asoc)) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, + SCTP_NULL()); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + + /* Make sure that the RECONF chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(*hdr))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + + if (!sctp_verify_reconf(asoc, chunk, &err_param)) + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, + (void *)err_param, commands); + + hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; + sctp_walk_params(param, hdr, params) { + struct sctp_chunk *reply = NULL; + struct sctp_ulpevent *ev = NULL; + + if (param.p->type == SCTP_PARAM_RESET_OUT_REQUEST) + reply = sctp_process_strreset_outreq( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST) + reply = sctp_process_strreset_inreq( + (struct sctp_association *)asoc, param, &ev); + /* More handles for other types will be added here, by now it + * just ignores other types. + */ + + if (ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + + if (reply) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(reply)); + } + + return SCTP_DISPOSITION_CONSUME; +} + /* * PR-SCTP Section 3.6 Receiver Side Implementation of PR-SCTP * diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index b5438b4f6c1e..419b18ebb056 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -482,6 +482,32 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN TYPE_SCTP_FWD_TSN, }; /*state_fn_t prsctp_chunk_event_table[][] */ +#define TYPE_SCTP_RECONF { \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_do_reconf), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_do_reconf), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ +} /* TYPE_SCTP_RECONF */ + +/* The primary index for this table is the chunk type. + * The secondary index for this table is the state. + */ +static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { + TYPE_SCTP_RECONF, +}; /*state_fn_t reconf_chunk_event_table[][] */ + #define TYPE_SCTP_AUTH { \ /* SCTP_STATE_CLOSED */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ @@ -964,6 +990,10 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, return &addip_chunk_event_table[1][state]; } + if (net->sctp.reconf_enable) + if (cid == SCTP_CID_RECONF) + return &reconf_chunk_event_table[0][state]; + if (net->sctp.auth_enable) { if (cid == SCTP_CID_AUTH) return &auth_chunk_event_table[0][state]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 75f35cea4371..b5321486fbed 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1964,6 +1964,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } + datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { diff --git a/net/sctp/stream.c b/net/sctp/stream.c index eb02490245ba..1c6cc04fa3a4 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -135,7 +135,14 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (str_list[i] >= stream->incnt) goto out; + for (i = 0; i < str_nums; i++) + str_list[i] = htons(str_list[i]); + chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); + + for (i = 0; i < str_nums; i++) + str_list[i] = ntohs(str_list[i]); + if (!chunk) { retval = -ENOMEM; goto out; @@ -294,3 +301,179 @@ int sctp_send_add_streams(struct sctp_association *asoc, out: return retval; } + +static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( + struct sctp_association *asoc, __u32 resp_seq) +{ + struct sctp_chunk *chunk = asoc->strreset_chunk; + struct sctp_reconf_chunk *hdr; + union sctp_params param; + + if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk) + return NULL; + + hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; + sctp_walk_params(param, hdr, params) { + /* sctp_strreset_tsnreq is actually the basic structure + * of all stream reconf params, so it's safe to use it + * to access request_seq. + */ + struct sctp_strreset_tsnreq *req = param.v; + + if (req->request_seq == resp_seq) + return param.v; + } + + return NULL; +} + +struct sctp_chunk *sctp_process_strreset_outreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_outreq *outreq = param.v; + struct sctp_stream *stream = asoc->stream; + __u16 i, nums, flags = 0, *str_p = NULL; + __u32 result = SCTP_STRRESET_DENIED; + __u32 request_seq; + + request_seq = ntohl(outreq->request_seq); + + if (ntohl(outreq->send_reset_at_tsn) > + sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)) { + result = SCTP_STRRESET_IN_PROGRESS; + goto out; + } + + if (request_seq > asoc->strreset_inseq) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto out; + } else if (request_seq == asoc->strreset_inseq) { + asoc->strreset_inseq++; + } + + /* Check strreset_enable after inseq inc, as sender cannot tell + * the peer doesn't enable strreset after receiving response with + * result denied, as well as to keep consistent with bsd. + */ + if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) + goto out; + + if (asoc->strreset_chunk) { + sctp_paramhdr_t *param_hdr; + struct sctp_transport *t; + + param_hdr = sctp_chunk_lookup_strreset_param( + asoc, outreq->response_seq); + if (!param_hdr || param_hdr->type != + SCTP_PARAM_RESET_IN_REQUEST) { + /* same process with outstanding isn't 0 */ + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + asoc->strreset_outstanding--; + asoc->strreset_outseq++; + + if (!asoc->strreset_outstanding) { + t = asoc->strreset_chunk->transport; + if (del_timer(&t->reconf_timer)) + sctp_transport_put(t); + + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + + flags = SCTP_STREAM_RESET_INCOMING_SSN; + } + + nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; + if (nums) { + str_p = outreq->list_of_streams; + for (i = 0; i < nums; i++) { + if (ntohs(str_p[i]) >= stream->incnt) { + result = SCTP_STRRESET_ERR_WRONG_SSN; + goto out; + } + } + + for (i = 0; i < nums; i++) + stream->in[ntohs(str_p[i])].ssn = 0; + } else { + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; + } + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, + flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p, + GFP_ATOMIC); + +out: + return sctp_make_strreset_resp(asoc, result, request_seq); +} + +struct sctp_chunk *sctp_process_strreset_inreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_inreq *inreq = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + struct sctp_chunk *chunk = NULL; + __u16 i, nums, *str_p; + __u32 request_seq; + + request_seq = ntohl(inreq->request_seq); + if (request_seq > asoc->strreset_inseq) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto out; + } else if (request_seq == asoc->strreset_inseq) { + asoc->strreset_inseq++; + } + + if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) + goto out; + + if (asoc->strreset_outstanding) { + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; + str_p = inreq->list_of_streams; + for (i = 0; i < nums; i++) { + if (ntohs(str_p[i]) >= stream->outcnt) { + result = SCTP_STRRESET_ERR_WRONG_SSN; + goto out; + } + } + + chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); + if (!chunk) + goto out; + + if (nums) + for (i = 0; i < nums; i++) + stream->out[ntohs(str_p[i])].state = + SCTP_STREAM_CLOSED; + else + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_CLOSED; + + asoc->strreset_chunk = chunk; + asoc->strreset_outstanding = 1; + sctp_chunk_hold(asoc->strreset_chunk); + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, + SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); + +out: + if (!chunk) + chunk = sctp_make_strreset_resp(asoc, result, request_seq); + + return chunk; +} diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index bea00058ce35..c8881bc542a0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -854,6 +854,35 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( return event; } +struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( + const struct sctp_association *asoc, __u16 flags, __u16 stream_num, + __u16 *stream_list, gfp_t gfp) +{ + struct sctp_stream_reset_event *sreset; + struct sctp_ulpevent *event; + struct sk_buff *skb; + int length, i; + + length = sizeof(struct sctp_stream_reset_event) + 2 * stream_num; + event = sctp_ulpevent_new(length, MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + sreset = (struct sctp_stream_reset_event *)skb_put(skb, length); + + sreset->strreset_type = SCTP_STREAM_RESET_EVENT; + sreset->strreset_flags = flags; + sreset->strreset_length = length; + sctp_ulpevent_set_owner(event, asoc); + sreset->strreset_assoc_id = sctp_assoc2id(asoc); + + for (i = 0; i < stream_num; i++) + sreset->strreset_stream_list[i] = ntohs(stream_list[i]); + + return event; +} + /* Return the notification type, assuming this is a notification * event. */ diff --git a/net/tipc/net.c b/net/tipc/net.c index 28bf4feeb81c..ab8a2d5d1e32 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -110,6 +110,10 @@ int tipc_net_start(struct net *net, u32 addr) char addr_string[16]; tn->own_addr = addr; + + /* Ensure that the new address is visible before we reinit. */ + smp_mb(); + tipc_named_reinit(net); tipc_sk_reinit(net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 103d1fd058c0..6b09a778cc71 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -430,8 +430,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, INIT_LIST_HEAD(&tsk->cong_links); msg = &tsk->phdr; tn = net_generic(sock_net(sk), tipc_net_id); - tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, - NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ sock->ops = ops; @@ -441,6 +439,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } + + /* Ensure tsk is visible before we read own_addr. */ + smp_mb(); + + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_shutdown = 0; @@ -2234,24 +2239,27 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, void tipc_sk_reinit(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - const struct bucket_table *tbl; - struct rhash_head *pos; + struct rhashtable_iter iter; struct tipc_sock *tsk; struct tipc_msg *msg; - int i; - rcu_read_lock(); - tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + rhashtable_walk_enter(&tn->sk_rht, &iter); + + do { + tsk = ERR_PTR(rhashtable_walk_start(&iter)); + if (tsk) + continue; + + while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; msg_set_prevnode(msg, tn->own_addr); msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - } - rcu_read_unlock(); + + rhashtable_walk_stop(&iter); + } while (tsk == ERR_PTR(-EAGAIN)); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index c06d3997c6e7..286ed25c1a69 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -6,6 +6,10 @@ config XFRM depends on NET select GRO_CELLS +config XFRM_OFFLOAD + bool + depends on XFRM + config XFRM_ALGO tristate select XFRM diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 3213fe8027be..46bdb4fbed0b 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -19,19 +19,18 @@ static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); -static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; +static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) err = -EEXIST; @@ -42,14 +41,10 @@ int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_register_afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) @@ -63,12 +58,13 @@ int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_unregister_afinfo); -static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) { - struct xfrm_input_afinfo *afinfo; + const struct xfrm_input_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) return NULL; + rcu_read_lock(); afinfo = rcu_dereference(xfrm_input_afinfo[family]); if (unlikely(!afinfo)) @@ -76,22 +72,17 @@ static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) return afinfo; } -static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, int err) { int ret; - struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; ret = afinfo->callback(skb, protocol, err); - xfrm_input_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -114,6 +105,8 @@ struct sec_path *secpath_dup(struct sec_path *src) return NULL; sp->len = 0; + sp->olen = 0; + if (src) { int i; @@ -126,6 +119,24 @@ struct sec_path *secpath_dup(struct sec_path *src) } EXPORT_SYMBOL(secpath_dup); +int secpath_set(struct sk_buff *skb) +{ + struct sec_path *sp; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + sp = secpath_dup(skb->sp); + if (!sp) + return -ENOMEM; + + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + return 0; +} +EXPORT_SYMBOL(secpath_set); + /* Fetch spi and seq from ipsec header */ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) @@ -161,6 +172,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); return 0; } +EXPORT_SYMBOL(xfrm_parse_spi); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { @@ -195,14 +207,23 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; + struct xfrm_offload *xo; + bool xfrm_gro = false; - /* A negative encap_type indicates async resumption. */ if (encap_type < 0) { - async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input.low; family = x->outer_mode->afinfo->family; - goto resume; + + /* An encap_type of -1 indicates async resumption. */ + if (encap_type == -1) { + async = 1; + seq = XFRM_SKB_CB(skb)->seq.input.low; + goto resume; + } + /* encap_type < -1 indicates a GRO call. */ + encap_type = 0; + seq = XFRM_SPI_SKB_CB(skb)->seq; + goto lock; } daddr = (xfrm_address_t *)(skb_network_header(skb) + @@ -221,18 +242,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) break; } - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + err = secpath_set(skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } seq = 0; @@ -256,6 +269,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; +lock: spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { @@ -377,7 +391,18 @@ resume: gro_cells_receive(&gro_cells, skb); return 0; } else { - return x->inner_mode->afinfo->transport_finish(skb, async); + xo = xfrm_offload(skb); + if (xo) + xfrm_gro = xo->flags & XFRM_GRO; + + err = x->inner_mode->afinfo->transport_finish(skb, async); + if (xfrm_gro) { + skb_dst_drop(skb); + gro_cells_receive(&gro_cells, skb); + return err; + } + + return err; } drop_unlock: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0a0f63d3cc96..5f3e87866438 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,7 +45,7 @@ struct xfrm_flo { }; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] +static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; @@ -103,11 +103,11 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl return false; } -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return NULL; rcu_read_lock(); afinfo = rcu_dereference(xfrm_policy_afinfo[family]); @@ -116,18 +116,13 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) return afinfo; } -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; afinfo = xfrm_policy_get_afinfo(family); @@ -136,7 +131,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return dst; } @@ -1431,12 +1426,12 @@ xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; err = afinfo->get_saddr(net, oif, local, remote); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1538,21 +1533,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, } -/* Check that the bundle accepts the flow and its components are - * still valid. - */ - -static inline int xfrm_get_tos(const struct flowi *fl, int family) +static int xfrm_get_tos(const struct flowi *fl, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int tos; + const struct xfrm_policy_afinfo *afinfo; + int tos = 0; - if (!afinfo) - return -EINVAL; - - tos = afinfo->get_tos(fl); + afinfo = xfrm_policy_get_afinfo(family); + tos = afinfo ? afinfo->get_tos(fl) : 0; - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return tos; } @@ -1609,7 +1598,7 @@ static const struct flow_cache_ops xfrm_bundle_fc_ops = { static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_ops *dst_ops; struct xfrm_dst *xdst; @@ -1638,7 +1627,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) } else xdst = ERR_PTR(-ENOBUFS); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return xdst; } @@ -1646,7 +1635,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(dst->ops->family); int err; @@ -1655,7 +1644,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, err = afinfo->init_path(path, dst, nfheader_len); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1663,7 +1652,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); int err; @@ -1672,7 +1661,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, err = afinfo->fill_dst(xdst, dev, fl); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1705,9 +1694,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); - err = tos; - if (tos < 0) - goto put_states; dst_hold(dst); @@ -2215,7 +2201,7 @@ error: static struct dst_entry *make_blackhole(struct net *net, u16 family, struct dst_entry *dst_orig) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_entry *ret; if (!afinfo) { @@ -2224,7 +2210,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, } else { ret = afinfo->blackhole_route(net, dst_orig); } - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -2466,7 +2452,7 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int err; if (unlikely(afinfo == NULL)) @@ -2474,7 +2460,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, afinfo->decode_session(skb, fl, reverse); err = security_xfrm_decode_session(skb, &fl->flowi_secid); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(__xfrm_decode_session); @@ -2742,10 +2728,11 @@ void xfrm_garbage_collect(struct net *net) } EXPORT_SYMBOL(xfrm_garbage_collect); -static void xfrm_garbage_collect_deferred(struct net *net) +void xfrm_garbage_collect_deferred(struct net *net) { flow_cache_flush_deferred(net); } +EXPORT_SYMBOL(xfrm_garbage_collect_deferred); static void xfrm_init_pmtu(struct dst_entry *dst) { @@ -2873,15 +2860,15 @@ static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) path->ops->confirm_neigh(path, daddr); } -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + + if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return -EAFNOSUPPORT; + spin_lock(&xfrm_policy_afinfo_lock); - if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) + if (unlikely(xfrm_policy_afinfo[family] != NULL)) err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; @@ -2901,9 +2888,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(!dst_ops->confirm_neigh)) dst_ops->confirm_neigh = xfrm_confirm_neigh; - if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = xfrm_garbage_collect_deferred; - rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); + rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); @@ -2911,34 +2896,24 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_register_afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) { - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - spin_lock(&xfrm_policy_afinfo_lock); - if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) - err = -EINVAL; - else - RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], - NULL); + struct dst_ops *dst_ops = afinfo->dst_ops; + int i; + + for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { + if (xfrm_policy_afinfo[i] != afinfo) + continue; + RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); + break; } - spin_unlock(&xfrm_policy_afinfo_lock); - if (!err) { - struct dst_ops *dst_ops = afinfo->dst_ops; - synchronize_rcu(); + synchronize_rcu(); - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } - return err; + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); |