From e62905ae34eaf5fe2cfb254be5e0c097b3b1f798 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Apr 2020 11:39:20 +0200 Subject: xfrm interface: don't take extra reference to netdev I don't see any reason to do this. Maybe needed before commit 56c5ee1a5823 ("xfrm interface: fix memory leak on creation"). Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 3361e3ac5714..eb9928c0a87c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - dev_hold(dev); xfrmi_link(xfrmn, xi); return 0; @@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(dev); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) -- cgit v1.2.3 From 0146dca70b877b73c5fd9c67912b8a0ca8a7bac7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 27 Apr 2020 17:59:34 +0200 Subject: xfrm: add support for UDPv6 encapsulation of ESP This patch adds support for encapsulation of ESP over UDPv6. The code is very similar to the IPv4 encapsulation implementation, and allows to easily add espintcp on IPv6 as a follow-up. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 3 + include/net/xfrm.h | 5 + net/ipv4/udp.c | 10 +- net/ipv6/af_inet6.c | 4 + net/ipv6/ah6.c | 1 + net/ipv6/esp6.c | 226 +++++++++++++++++++++++++++++++++++++++++----- net/ipv6/esp6_offload.c | 7 +- net/ipv6/ip6_vti.c | 18 +++- net/ipv6/ipcomp6.c | 1 + net/ipv6/xfrm6_input.c | 106 +++++++++++++++++++++- net/ipv6/xfrm6_protocol.c | 48 ++++++++++ net/xfrm/xfrm_interface.c | 3 + 12 files changed, 395 insertions(+), 37 deletions(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0e79ca..f033a17b53b6 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -56,6 +56,9 @@ struct ipv6_stub { void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); +#if IS_ENABLED(CONFIG_XFRM) + int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); +#endif struct neigh_table *nd_tbl; }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8f71c111e65a..2577666c34c8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1406,6 +1406,8 @@ struct xfrm4_protocol { struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); + int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); @@ -1590,6 +1592,8 @@ int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int xfrm6_transport_finish(struct sk_buff *skb, int async); int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); @@ -1610,6 +1614,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, #ifdef CONFIG_XFRM int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen); #else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32564b350823..1b7ebbcae497 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -112,6 +112,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: - up->encap_rcv = xfrm4_udp_encap_rcv; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + else +#endif + up->encap_rcv = xfrm4_udp_encap_rcv; #endif fallthrough; case UDP_ENCAP_L2TPINUDP: diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 345baa0a754f..b0b99c08350a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -961,6 +962,9 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, +#if IS_ENABLED(CONFIG_XFRM) + .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, +#endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 45e2adc56610..d88d97617f7e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = { static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, .priority = 0, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 11143d039f16..e8800968e209 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -26,10 +26,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -39,6 +41,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) /* @@ -72,9 +79,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) @@ -104,16 +111,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { + struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; - int seqhilen = 0; + int extralen = 0; u8 *iv; struct aead_request *req; struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); - iv = esp_tmp_iv(aead, tmp, seqhilen); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); /* Unref skb_frag_pages in the src scatterlist if necessary. @@ -124,6 +132,23 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +static void esp_output_encap_csum(struct sk_buff *skb) +{ + /* UDP encap with IPv6 requires a valid checksum */ + if (*skb_mac_header(skb) == IPPROTO_UDP) { + struct udphdr *uh = udp_hdr(skb); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int len = ntohs(uh->len); + unsigned int offset = skb_transport_offset(skb); + __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0); + + uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -143,6 +168,8 @@ static void esp_output_done(struct crypto_async_request *base, int err) esp_ssg_unref(x, tmp); kfree(tmp); + esp_output_encap_csum(skb); + if (xo && (xo->flags & XFRM_DEV_RESUME)) { if (err) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); @@ -163,7 +190,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -171,27 +198,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, struct xfrm_state *x, struct ip_esp_hdr *esph, - __be32 *seqhi) + struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; struct xfrm_offload *xo = xfrm_offload(skb); - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; if (xo) - esph->seq_no = htonl(xo->seq.hi); + seqhi = xo->seq.hi; else - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; @@ -207,15 +243,84 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, + int encap_type, + struct esp_info *esp, + __be16 sport, + __be16 dport) +{ + struct udphdr *uh; + __be32 *udpdata32; + unsigned int len; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > U16_MAX) + return ERR_PTR(-EMSGSIZE); + + uh = (struct udphdr *)esp->esph; + uh->source = sport; + uh->dest = dport; + uh->len = htons(len); + uh->check = 0; + + *skb_mac_header(skb) = IPPROTO_UDP; + + if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + return (struct ip_esp_hdr *)(udpdata32 + 2); + } + + return (struct ip_esp_hdr *)(uh + 1); +} + +static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, + struct esp_info *esp) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph; + __be16 sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); + break; + } + + if (IS_ERR(esph)) + return PTR_ERR(esph); + + esp->esph = esph; + + return 0; +} + int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; u8 *vaddr; int nfrags; + int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + if (x->encap) { + int err = esp6_output_encap(x, skb, esp); + + if (err < 0) + return err; + } + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -274,10 +379,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); + nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); @@ -295,20 +403,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info void *tmp; int ivlen; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; struct page *page; struct ip_esp_hdr *esph; struct aead_request *req; struct crypto_aead *aead; struct scatterlist *sg, *dsg; + struct esp_output_extra *extra; int err = -ENOMEM; assoclen = sizeof(struct ip_esp_hdr); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); assoclen += sizeof(__be32); } @@ -316,12 +424,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info alen = crypto_aead_authsize(aead); ivlen = crypto_aead_ivsize(aead); - tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen); + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); if (!tmp) goto error; - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -330,7 +438,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info else dsg = &sg[esp->nfrags]; - esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); + esph = esp_output_set_esn(skb, x, esp->esph, extra); + esp->esph = esph; sg_init_table(sg, esp->nfrags); err = skb_to_sgvec(skb, sg, @@ -394,6 +503,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info case 0: if ((x->props.flags & XFRM_STATE_ESN)) esp_output_restore_header(skb); + esp_output_encap_csum(skb); } if (sg != dsg) @@ -438,11 +548,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; + esp.esph = ip_esp_hdr(skb); + esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; - esph = ip_esp_hdr(skb); + esph = esp.esph; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); @@ -517,6 +629,56 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (unlikely(err < 0)) goto out; + if (x->encap) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct xfrm_encap_tmpl *encap = x->encap; + struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + __be16 source; + + switch (x->encap->encap_type) { + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + source = uh->source; + break; + default: + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } + + /* + * 1) if the NAT-T peer's IP or port changed then + * advertize the change to the keying daemon. + * This is an inbound SA, so just compare + * SRC ports. + */ + if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) || + source != encap->encap_sport) { + xfrm_address_t ipaddr; + + memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6)); + km_new_mapping(x, &ipaddr, source); + + /* XXX: perhaps add an extra + * policy check here, to see + * if we should allow or + * reject a packet from a + * different source + * address/port. + */ + } + + /* + * 2) ignore UDP/TCP checksums in case + * of NAT-T in Transport Mode, or + * perform other post-processing fixes + * as per draft-ietf-ipsec-udp-encaps-06, + * section 3.1.2 + */ + if (x->props.mode == XFRM_MODE_TRANSPORT) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); @@ -632,7 +794,7 @@ skip_cow: goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -836,9 +998,6 @@ static int esp6_init_state(struct xfrm_state *x) u32 align; int err; - if (x->encap) - return -EINVAL; - x->data = NULL; if (x->aead) @@ -867,6 +1026,22 @@ static int esp6_init_state(struct xfrm_state *x) break; } + if (x->encap) { + struct xfrm_encap_tmpl *encap = x->encap; + + switch (encap->encap_type) { + default: + err = -EINVAL; + goto error; + case UDP_ENCAP_ESPINUDP: + x->props.header_len += sizeof(struct udphdr); + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); + break; + } + } + align = ALIGN(crypto_aead_blocksize(aead), 4); x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); @@ -893,6 +1068,7 @@ static const struct xfrm_type esp6_type = { static struct xfrm6_protocol esp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, .priority = 0, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 8eab2c869d61..06163cc15844 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -271,7 +271,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features int alen; int blksize; struct xfrm_offload *xo; - struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; @@ -312,13 +311,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features seq = xo->seq.low; - esph = ip_esp_hdr(skb); - esph->spi = x->id.spi; + esp.esph = ip_esp_hdr(skb); + esp.esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(seq); + esp.esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index cc6180e08a4f..1147f647b9a0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev) dev_put(dev); } -static int vti6_rcv(struct sk_buff *skb) +static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_unlock(); - return xfrm6_rcv_tnl(skb, t); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, encap_type); } rcu_read_unlock(); return -EINVAL; @@ -332,6 +336,13 @@ discard: return 0; } +static int vti6_rcv(struct sk_buff *skb) +{ + int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; + + return vti6_input_proto(skb, nexthdr, 0, 0); +} + static int vti6_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = { static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3752bd3e92ce..99668bfebd85 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = { static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a52cb3fc6df5..56f52353b324 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -35,9 +35,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (xfrm_trans_queue(skb, ip6_rcv_finish)) - __kfree_skb(skb); - return -1; + if (xfrm_trans_queue(skb, ip6_rcv_finish)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) @@ -60,13 +63,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); skb_reset_transport_header(skb); - return -1; + return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); - return -1; + return 0; +} + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct ipv6hdr *ip6h; + int len; + int ip6hlen = sizeof(struct ipv6hdr); + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_unclone(skb, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); + if (skb->len < ip6hlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* process ESP */ + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); + +drop: + kfree_skb(skb); + return 0; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 34cb65c7d5a7..ea2f805d3b01 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + int ret; + struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + + if (!head) + goto out; + + if (!skb_dst(skb)) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, + skb, flags); + if (dst->error) + goto drop; + skb_dst_set(skb, dst); + } + + for_each_protocol_rcu(*head, handler) + if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) + return ret; + +out: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_encap); + static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index eb9928c0a87c..02f8f46d0cc5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -755,6 +755,7 @@ static struct pernet_operations xfrmi_net_ops = { static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -762,6 +763,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -769,6 +771,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, -- cgit v1.2.3 From 26333c37fc285e7372f1b9461f3ae0ba3dc699c9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 27 Apr 2020 17:59:35 +0200 Subject: xfrm: add IPv6 support for espintcp This extends espintcp to support IPv6, building on the existing code and the new UDPv6 encapsulation support. Most of the code is either reused directly (stream parser, ULP) or very similar to the IPv4 variant (net/ipv6/esp6.c changes). The separation of config options for IPv4 and IPv6 espintcp requires a bit of Kconfig gymnastics to enable the core code. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 2 + net/ipv4/Kconfig | 1 + net/ipv6/Kconfig | 12 +++ net/ipv6/af_inet6.c | 1 + net/ipv6/esp6.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++- net/xfrm/Kconfig | 3 + net/xfrm/Makefile | 2 +- net/xfrm/espintcp.c | 56 +++++++++++--- 8 files changed, 252 insertions(+), 13 deletions(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index f033a17b53b6..1e9e0cf7dc75 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -58,6 +58,8 @@ struct ipv6_stub { bool router, bool solicited, bool override, bool inc_opt); #if IS_ENABLED(CONFIG_XFRM) int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); #endif struct neigh_table *nd_tbl; }; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 25a8888826b8..014aaa17dc79 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -384,6 +384,7 @@ config INET_ESPINTCP depends on XFRM && INET_ESP select STREAM_PARSER select NET_SOCK_MSG + select XFRM_ESPINTCP help Support for RFC 8229 encapsulation of ESP and IKE over TCP/IPv4 sockets. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2ccaee98fddb..468a2faadc7d 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD If unsure, say N. +config INET6_ESPINTCP + bool "IPv6: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET6_ESP + select STREAM_PARSER + select NET_SOCK_MSG + select XFRM_ESPINTCP + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv6 sockets. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b0b99c08350a..cbbb00bad20e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -964,6 +964,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e8800968e209..c43592771126 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -33,6 +33,9 @@ #include #include #include +#include +#include +#include #include @@ -132,6 +135,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET6_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + static void esp_output_encap_csum(struct sk_buff *skb) { /* UDP encap with IPv6 requires a valid checksum */ @@ -181,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -274,6 +407,41 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, return (struct ip_esp_hdr *)(uh + 1); } +#ifdef CONFIG_INET6_ESPINTCP +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp6_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { @@ -294,6 +462,9 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); break; + case TCP_ENCAP_ESPINTCP: + esph = esp6_output_tcp_encap(x, skb, esp); + break; } if (IS_ERR(esph)) @@ -509,6 +680,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -633,9 +807,13 @@ int esp6_input_done2(struct sk_buff *skb, int err) const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); __be16 source; switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; @@ -1039,6 +1217,14 @@ static int esp6_init_state(struct xfrm_state *x) case UDP_ENCAP_ESPINUDP_NON_IKE: x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); break; +#ifdef CONFIG_INET6_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif } } diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6921a18201a0..b7fd9c838416 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -99,4 +99,7 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_ESPINTCP + bool + endif # INET diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 212a4fcb4a88..2d4bb4b9f75e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o -obj-$(CONFIG_INET_ESPINTCP) += espintcp.o +obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..2132a3b6df0f 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -6,6 +6,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); local_bh_disable(); - xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + else +#endif + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); local_bh_enable(); rcu_read_unlock(); } @@ -347,6 +355,9 @@ unlock: static struct proto espintcp_prot __ro_after_init; static struct proto_ops espintcp_ops __ro_after_init; +static struct proto espintcp6_prot; +static struct proto_ops espintcp6_ops; +static DEFINE_MUTEX(tcpv6_prot_mutex); static void espintcp_data_ready(struct sock *sk) { @@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk) bool tcp_is_ulp_esp(struct sock *sk) { - return sk->sk_prot == &espintcp_prot; + return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot; } EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); static int espintcp_init_sk(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk) strp_check_rcv(&ctx->strp); skb_queue_head_init(&ctx->ike_queue); skb_queue_head_init(&ctx->out_queue); - sk->sk_prot = &espintcp_prot; - sk->sk_socket->ops = &espintcp_ops; + + if (sk->sk_family == AF_INET) { + sk->sk_prot = &espintcp_prot; + sk->sk_socket->ops = &espintcp_ops; + } else { + mutex_lock(&tcpv6_prot_mutex); + if (!espintcp6_prot.recvmsg) + build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops); + mutex_unlock(&tcpv6_prot_mutex); + + sk->sk_prot = &espintcp6_prot; + sk->sk_socket->ops = &espintcp6_ops; + } ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; sk->sk_data_ready = espintcp_data_ready; @@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock, return mask; } +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(espintcp_prot, orig_prot, sizeof(struct proto)); + memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops)); + espintcp_prot->sendmsg = espintcp_sendmsg; + espintcp_prot->recvmsg = espintcp_recvmsg; + espintcp_prot->close = espintcp_close; + espintcp_prot->release_cb = espintcp_release; + espintcp_ops->poll = espintcp_poll; +} + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { .name = "espintcp", .owner = THIS_MODULE, @@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = { void __init espintcp_init(void) { - memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); - memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); - espintcp_prot.sendmsg = espintcp_sendmsg; - espintcp_prot.recvmsg = espintcp_recvmsg; - espintcp_prot.close = espintcp_close; - espintcp_prot.release_cb = espintcp_release; - espintcp_ops.poll = espintcp_poll; + build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops); tcp_register_ulp(&espintcp_ulp); } -- cgit v1.2.3 From 6d64be3da282908bb17b0803b9edad8852ffea56 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:03 +0200 Subject: xfrm: avoid extract_output indirection for ipv4 We can use a direct call for ipv4, so move the needed functions to net/xfrm/xfrm_output.c and call them directly. For ipv6 the indirection can be avoided as well but it will need a bit more work -- to ease review it will be done in another patch. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/ipv4/xfrm4_output.c | 40 ---------------------------------------- net/ipv4/xfrm4_state.c | 1 - net/xfrm/xfrm_output.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 42 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2577666c34c8..397007324abd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1580,7 +1580,6 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) return xfrm_input(skb, nexthdr, spi, 0); } -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 89ba7c87de5d..21c8fa0a31ed 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -14,46 +14,6 @@ #include #include -static int xfrm4_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - - if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) - goto out; - - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) - goto out; - - mtu = dst_mtu(skb_dst(skb)); - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { - skb->protocol = htons(ETH_P_IP); - - if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm4_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; - - return xfrm4_extract_header(skb); -} - int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index f8ed3c3bb928..d7c200779e4f 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -37,7 +37,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .output = xfrm4_output, .output_finish = xfrm4_output_finish, .extract_input = xfrm4_extract_input, - .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2fd3d990d992..a7b3af7f7a1e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -609,6 +610,47 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); +static int xfrm4_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + + if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) + goto out; + + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) + goto out; + + mtu = dst_mtu(skb_dst(skb)); + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { + skb->protocol = htons(ETH_P_IP); + + if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + ret = -EMSGSIZE; + } +out: + return ret; +} + +static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm4_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; + + xfrm4_extract_header(skb); + return 0; +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { const struct xfrm_state_afinfo *afinfo; @@ -624,6 +666,10 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (inner_mode == NULL) return -EAFNOSUPPORT; + switch (inner_mode->family) { + case AF_INET: + return xfrm4_extract_output(x, skb); + } rcu_read_lock(); afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); if (likely(afinfo)) -- cgit v1.2.3 From a269fbfc4e9ffe48c1f8142e60a49b6f2e588c58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:04 +0200 Subject: xfrm: state: remove extract_input indirection from xfrm_state_afinfo In order to keep CONFIG_IPV6=m working, xfrm6_extract_header needs to be duplicated. It will be removed again in a followup change when the remaining caller is moved to net/xfrm as well. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 --- net/ipv4/xfrm4_input.c | 5 ----- net/ipv4/xfrm4_state.c | 1 - net/ipv6/xfrm6_input.c | 5 ----- net/ipv6/xfrm6_output.c | 17 ++++++++++++++++- net/ipv6/xfrm6_state.c | 24 ------------------------ net/xfrm/xfrm_inout.h | 18 ++++++++++++++++++ net/xfrm/xfrm_input.c | 21 +++++++++++---------- 8 files changed, 45 insertions(+), 49 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 397007324abd..a21c1dea5340 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,8 +362,6 @@ struct xfrm_state_afinfo { int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*output_finish)(struct sock *sk, struct sk_buff *skb); - int (*extract_input)(struct xfrm_state *x, - struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, @@ -1587,7 +1585,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f8de2482a529..ad2afeef4f10 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -18,11 +18,6 @@ #include #include -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm4_extract_header(skb); -} - static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index d7c200779e4f..521fc1bc069c 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -36,7 +36,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .proto = IPPROTO_IPIP, .output = xfrm4_output, .output_finish = xfrm4_output_finish, - .extract_input = xfrm4_extract_input, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 56f52353b324..04cbeefd8982 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -17,11 +17,6 @@ #include #include -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm6_extract_header(skb); -} - int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index fbe51d40bd7e..855078a43fc7 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -94,6 +94,20 @@ out: return ret; } +static void __xfrm6_extract_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +} + int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { int err; @@ -104,7 +118,8 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - return xfrm6_extract_header(skb); + __xfrm6_extract_header(skb); + return 0; } int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 78daadecbdef..8fbf5a68ee6e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -13,36 +13,12 @@ */ #include -#include -#include -#include -#include -#include -#include -#include - -int xfrm6_extract_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = 0; - XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); - XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); - XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->optlen = 0; - memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .output = xfrm6_output, .output_finish = xfrm6_output_finish, - .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h index c7b0318938e2..e24abac92dc2 100644 --- a/net/xfrm/xfrm_inout.h +++ b/net/xfrm/xfrm_inout.h @@ -6,6 +6,24 @@ #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 +static inline void xfrm6_extract_header(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +#else + WARN_ON_ONCE(1); +#endif +} + static inline void xfrm6_beet_make_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index aa35f23c4912..6db266a0cb2d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { const struct xfrm_mode *inner_mode = &x->inner_mode; - const struct xfrm_state_afinfo *afinfo; - int err = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - err = afinfo->extract_input(x, skb); - rcu_read_unlock(); - if (err) - return err; + switch (x->outer_mode.family) { + case AF_INET: + xfrm4_extract_header(skb); + break; + case AF_INET6: + xfrm6_extract_header(skb); + break; + default: + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); -- cgit v1.2.3 From 171916cbd53dec5c7b05efb56a201671d92effc1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:05 +0200 Subject: xfrm: move xfrm4_extract_header to common helper The function only initializes the XFRM CB in the skb. After previous patch xfrm4_extract_header is only called from net/xfrm/xfrm_{input,output}.c. Because of IPV6=m linker errors the ipv6 equivalent (xfrm6_extract_header) was already placed in xfrm_inout.h because we can't call functions residing in a module from the core. So do the same for the ipv4 helper and place it next to the ipv6 one. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/ipv4/xfrm4_state.c | 21 --------------------- net/xfrm/xfrm_inout.h | 14 ++++++++++++++ 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a21c1dea5340..8b956528b6e6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1562,7 +1562,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 521fc1bc069c..b23a1711297b 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -8,28 +8,7 @@ * */ -#include #include -#include -#include -#include -#include - -int xfrm4_extract_header(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = iph->id; - XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; - XFRM_MODE_SKB_CB(skb)->tos = iph->tos; - XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; - XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); - memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h index e24abac92dc2..efc5e6b2e87b 100644 --- a/net/xfrm/xfrm_inout.h +++ b/net/xfrm/xfrm_inout.h @@ -6,6 +6,20 @@ #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 +static inline void xfrm4_extract_header(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = iph->id; + XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; + XFRM_MODE_SKB_CB(skb)->tos = iph->tos; + XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); + memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +} + static inline void xfrm6_extract_header(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.3 From 3e50ddd8b8d5067796fc87cbbb25c71451ccb385 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:06 +0200 Subject: xfrm: expose local_rxpmtu via ipv6_stubs We cannot call this function from the core kernel unless we would force CONFIG_IPV6=y. Therefore expose this via ipv6_stubs so we can call it from net/xfrm in the followup patch. Since the call is expected to be unlikely, no extra code for the IPV6=y case is added and we will always eat the indirection cost. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 1 + include/net/xfrm.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/xfrm6_output.c | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 1e9e0cf7dc75..d8ab3872aa2a 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -57,6 +57,7 @@ struct ipv6_stub { const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); #if IS_ENABLED(CONFIG_XFRM) + void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8b956528b6e6..10295ab4cdfb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1608,6 +1608,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index cbbb00bad20e..aa4882929fd0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -963,6 +963,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) + .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 855078a43fc7..23e2b52cfba6 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -40,7 +40,7 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb) return 0; } -static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; -- cgit v1.2.3 From ede64dd2bfe2710549f1922a214959d966baaac3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:07 +0200 Subject: xfrm: place xfrm6_local_dontfrag in xfrm.h so next patch can re-use it from net/xfrm/xfrm_output.c without causing a linker error when IPV6 is a module. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 16 ++++++++++++++++ net/ipv6/xfrm6_output.c | 21 ++------------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 10295ab4cdfb..8f7fb033d557 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1993,4 +1993,20 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool xfrm6_local_dontfrag(const struct sock *sk) +{ + int proto; + + if (!sk || sk->sk_family != AF_INET6) + return false; + + proto = sk->sk_protocol; + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + + return false; +} +#endif #endif /* _NET_XFRM_H */ diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 23e2b52cfba6..be64f280510c 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -23,23 +23,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, } EXPORT_SYMBOL(xfrm6_find_1stfragopt); -static int xfrm6_local_dontfrag(struct sk_buff *skb) -{ - int proto; - struct sock *sk = skb->sk; - - if (sk) { - if (sk->sk_family != AF_INET6) - return 0; - - proto = sk->sk_protocol; - if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; - } - - return 0; -} - void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; @@ -82,7 +65,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); - if (xfrm6_local_dontfrag(skb)) + if (xfrm6_local_dontfrag(skb->sk)) xfrm6_local_rxpmtu(skb, mtu); else if (skb->sk) xfrm_local_error(skb, mtu); @@ -181,7 +164,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb)) { + if (toobig && xfrm6_local_dontfrag(skb->sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; -- cgit v1.2.3 From f3075f48ddb2c4d076aeda36fa0939163e4b2816 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:08 +0200 Subject: xfrm: remove extract_output indirection from xfrm_state_afinfo Move this to xfrm_output.c. This avoids the state->extract_output indirection. This patch also removes the duplicated __xfrm6_extract_header helper added in an earlier patch, we can now use the one from xfrm_inout.h . Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 --- net/ipv6/xfrm6_output.c | 58 ------------------------------------------ net/ipv6/xfrm6_state.c | 1 - net/xfrm/xfrm_output.c | 67 +++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 59 insertions(+), 70 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8f7fb033d557..db814a7e042f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,8 +362,6 @@ struct xfrm_state_afinfo { int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*output_finish)(struct sock *sk, struct sk_buff *skb); - int (*extract_output)(struct xfrm_state *x, - struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); @@ -1601,7 +1599,6 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index be64f280510c..b7d65b344679 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -47,64 +47,6 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu) ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -static int xfrm6_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - struct dst_entry *dst = skb_dst(skb); - - if (skb->ignore_df) - goto out; - - mtu = dst_mtu(dst); - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - - if (xfrm6_local_dontfrag(skb->sk)) - xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ret = -EMSGSIZE; - } -out: - return ret; -} - -static void __xfrm6_extract_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = 0; - XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); - XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); - XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->optlen = 0; - memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); -} - -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm6_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - - __xfrm6_extract_header(skb); - return 0; -} - int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 8fbf5a68ee6e..15247f2f78e1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -19,7 +19,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .proto = IPPROTO_IPV6, .output = xfrm6_output, .output_finish = xfrm6_output_finish, - .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a7b3af7f7a1e..3a646df1318d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -17,6 +17,11 @@ #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#endif + #include "xfrm_inout.h" static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -651,11 +656,60 @@ static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + struct dst_entry *dst = skb_dst(skb); + + if (skb->ignore_df) + goto out; + + mtu = dst_mtu(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + if (xfrm6_local_dontfrag(skb->sk)) + ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ret = -EMSGSIZE; + } +out: + return ret; +} +#endif + +static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + + xfrm6_extract_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; - int err = -EAFNOSUPPORT; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, @@ -669,14 +723,11 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) switch (inner_mode->family) { case AF_INET: return xfrm4_extract_output(x, skb); + case AF_INET6: + return xfrm6_extract_output(x, skb); } - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - err = afinfo->extract_output(x, skb); - rcu_read_unlock(); - return err; + return -EAFNOSUPPORT; } void xfrm_local_error(struct sk_buff *skb, int mtu) -- cgit v1.2.3 From 2ab6096db2f16b3a6adbad252f1be171e649028d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:09 +0200 Subject: xfrm: remove output_finish indirection from xfrm_state_afinfo There are only two implementaions, one for ipv4 and one for ipv6. Both are almost identical, they clear skb->cb[], set the TRANSFORMED flag in IP(6)CB and then call the common xfrm_output() function. By placing the IPCB handling into the common function, we avoid the need for the output_finish indirection as the output functions can simply use xfrm_output(). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/ipv4/xfrm4_output.c | 23 +---------------------- net/ipv4/xfrm4_state.c | 1 - net/ipv6/xfrm6_output.c | 34 ++-------------------------------- net/ipv6/xfrm6_state.c | 1 - net/xfrm/xfrm_output.c | 16 ++++++++++++++++ 6 files changed, 19 insertions(+), 57 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index db814a7e042f..094fe682f5d7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -361,7 +361,6 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_dstopts; int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sock *sk, struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 21c8fa0a31ed..502eb189d852 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -14,22 +14,9 @@ #include #include -int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - -#ifdef CONFIG_NETFILTER - IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif - - return xfrm_output(sk, skb); -} - static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; #ifdef CONFIG_NETFILTER if (!x) { @@ -38,15 +25,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; + return xfrm_output(sk, skb); } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index b23a1711297b..87d4db591488 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -14,7 +14,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .proto = IPPROTO_IPIP, .output = xfrm4_output, - .output_finish = xfrm4_output_finish, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b7d65b344679..8b84d534b19d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -47,39 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu) ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - -#ifdef CONFIG_NETFILTER - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - - return xfrm_output(sk, skb); -} - -static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, - struct sk_buff *skb) -{ - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; -} - static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -121,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 15247f2f78e1..6610b2198fa9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -18,7 +18,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .output = xfrm6_output, - .output_finish = xfrm6_output_finish, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3a646df1318d..9c43b8dd80fb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -571,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + switch (x->outer_mode.family) { + case AF_INET: + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +#ifdef CONFIG_NETFILTER + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; +#endif + break; + case AF_INET6: + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif + break; + } + secpath_reset(skb); if (xfrm_dev_offload_ok(skb, x)) { -- cgit v1.2.3 From 7d4343d501f9b5ddbc92f278adba339d16d010e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 May 2020 10:33:42 +0200 Subject: xfrm: fix unused variable warning if CONFIG_NETFILTER=n After recent change 'x' is only used when CONFIG_NETFILTER is set: net/ipv4/xfrm4_output.c: In function '__xfrm4_output': net/ipv4/xfrm4_output.c:19:21: warning: unused variable 'x' [-Wunused-variable] 19 | struct xfrm_state *x = skb_dst(skb)->xfrm; Expand the CONFIG_NETFILTER scope to avoid this. Fixes: 2ab6096db2f1 ("xfrm: remove output_finish indirection from xfrm_state_afinfo") Reported-by: Stephen Rothwell Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 502eb189d852..3cff51ba72bb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -16,9 +16,9 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER struct xfrm_state *x = skb_dst(skb)->xfrm; -#ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); -- cgit v1.2.3