From c6ea04ea692fa0d8e7faeb133fcd28e3acf470a0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Oct 2021 05:36:02 -0400 Subject: sctp: reset probe_timer in sctp_transport_pl_update sctp_transport_pl_update() is called when transport update its dst and pathmtu, instead of stopping the PLPMTUD probe timer, PLPMTUD should start over and reset the probe timer. Otherwise, the PLPMTUD service would stop. Fixes: 92548ec2f1f9 ("sctp: add the probe timer in transport for PLPMTUD") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 69bab88ad66b..bc00410223b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -653,12 +653,10 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t) if (t->pl.state == SCTP_PL_DISABLED) return; - if (del_timer(&t->probe_timer)) - sctp_transport_put(t); - t->pl.state = SCTP_PL_BASE; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pl.probe_size = SCTP_BASE_PLPMTU; + sctp_transport_reset_probe_timer(t); } static inline bool sctp_transport_pl_enabled(struct sctp_transport *t) -- cgit v1.2.3 From cc4665ca646c96181a7c00198aa72c59e0c576e8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Oct 2021 05:36:03 -0400 Subject: sctp: subtract sctphdr len in sctp_transport_pl_hlen sctp_transport_pl_hlen() is called to calculate the outer header length for PL. However, as the Figure in rfc8899#section-4.4: Any additional headers .--- MPS -----. | | | v v v +------------------------------+ | IP | ** | PL | protocol data | +------------------------------+ <----- PLPMTU -----> <---------- PMTU --------------> Outer header are IP + Any additional headers, which doesn't include Packetization Layer itself header, namely sctphdr, whereas sctphdr is counted by __sctp_mtu_payload(). The incorrect calculation caused the link pathmtu to be set larger than expected by t->pl.pmtu + sctp_transport_pl_hlen(). This patch is to fix it by subtracting sctphdr len in sctp_transport_pl_hlen(). Fixes: d9e2e410ae30 ("sctp: add the constants/variables and states and some APIs for transport") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bc00410223b0..189fdb9db162 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -626,7 +626,8 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) static inline int sctp_transport_pl_hlen(struct sctp_transport *t) { - return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0); + return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) - + sizeof(struct sctphdr); } static inline void sctp_transport_pl_reset(struct sctp_transport *t) -- cgit v1.2.3 From 6de6e46d27ef386feecdbea56b3bfd6c3b3bc1f9 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Fri, 29 Oct 2021 09:21:41 +0000 Subject: cls_flower: Fix inability to match GRE/IPIP packets When a packet of a new flow arrives in openvswitch kernel module, it dissects the packet and passes the extracted flow key to ovs-vswtichd daemon. If hw- offload configuration is enabled, the daemon creates a new TC flower entry to bypass openvswitch kernel module for the flow (TC flower can also offload flows to NICs but this time that does not matter). In this processing flow, I found the following issue in cases of GRE/IPIP packets. When ovs_flow_key_extract() in openvswitch module parses a packet of a new GRE (or IPIP) flow received on non-tunneling vports, it extracts information of the outer IP header for ip_proto/src_ip/dst_ip match keys. This means ovs-vswitchd creates a TC flower entry with IP protocol/addresses match keys whose values are those of the outer IP header. OTOH, TC flower, which uses flow_dissector (different parser from openvswitch module), extracts information of the inner IP header. The following flow is an example to describe the issue in more detail. <----------- Outer IP -----------------> <---------- Inner IP ----------> +----------+--------------+--------------+----------+----------+----------+ | ip_proto | src_ip | dst_ip | ip_proto | src_ip | dst_ip | | 47 (GRE) | 192.168.10.1 | 192.168.10.2 | 6 (TCP) | 10.0.0.1 | 10.0.0.2 | +----------+--------------+--------------+----------+----------+----------+ In this case, TC flower entry and extracted information are shown as below: - ovs-vswitchd creates TC flower entry with: - ip_proto: 47 - src_ip: 192.168.10.1 - dst_ip: 192.168.10.2 - TC flower extracts below for IP header matches: - ip_proto: 6 - src_ip: 10.0.0.1 - dst_ip: 10.0.0.2 Thus, GRE or IPIP packets never match the TC flower entry, as each dissector behaves differently. IMHO, the behavior of TC flower (flow dissector) does not look correct, as ip_proto/src_ip/dst_ip in TC flower match means the outermost IP header information except for GRE/IPIP cases. This patch adds a new flow_dissector flag FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP which skips dissection of the encapsulated inner GRE/IPIP header in TC flower classifier. Signed-off-by: Yoshiki Komachi Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 + net/core/flow_dissector.c | 15 +++++++++++++++ net/sched/cls_flower.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index ffd386ea0dbb..aa33e1092e2c 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -287,6 +287,7 @@ enum flow_dissector_key_id { #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) +#define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3) struct flow_dissector_key { enum flow_dissector_key_id key_id; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index bac0184cf3de..0d4bbf534c7d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1307,6 +1307,11 @@ ip_proto_again: switch (ip_proto) { case IPPROTO_GRE: + if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, target_container, data, &proto, &nhoff, &hlen, flags); @@ -1364,6 +1369,11 @@ ip_proto_again: break; } case IPPROTO_IPIP: + if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + proto = htons(ETH_P_IP); key_control->flags |= FLOW_DIS_ENCAPSULATION; @@ -1376,6 +1386,11 @@ ip_proto_again: break; case IPPROTO_IPV6: + if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + proto = htons(ETH_P_IPV6); key_control->flags |= FLOW_DIS_ENCAPSULATION; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index eb6345a027e1..aab13ba11767 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -329,7 +329,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, ARRAY_SIZE(fl_ct_info_to_flower_map), post_ct); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); - skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); + skb_flow_dissect(skb, &mask->dissector, &skb_key, + FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); f = fl_mask_lookup(mask, &skb_key); if (f && !tc_skip_sw(f->flags)) { -- cgit v1.2.3