From 3cf7203ca620682165706f70a1b12b5194607dce Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 8 Dec 2022 20:04:52 +0800 Subject: net/tunnel: wait until all sk_user_data reader finish before releasing the sock There is a race condition in vxlan that when deleting a vxlan device during receiving packets, there is a possibility that the sock is released after getting vxlan_sock vs from sk_user_data. Then in later vxlan_ecn_decapsulate(), vxlan_get_sk_family() we will got NULL pointer dereference. e.g. #0 [ffffa25ec6978a38] machine_kexec at ffffffff8c669757 #1 [ffffa25ec6978a90] __crash_kexec at ffffffff8c7c0a4d #2 [ffffa25ec6978b58] crash_kexec at ffffffff8c7c1c48 #3 [ffffa25ec6978b60] oops_end at ffffffff8c627f2b #4 [ffffa25ec6978b80] page_fault_oops at ffffffff8c678fcb #5 [ffffa25ec6978bd8] exc_page_fault at ffffffff8d109542 #6 [ffffa25ec6978c00] asm_exc_page_fault at ffffffff8d200b62 [exception RIP: vxlan_ecn_decapsulate+0x3b] RIP: ffffffffc1014e7b RSP: ffffa25ec6978cb0 RFLAGS: 00010246 RAX: 0000000000000008 RBX: ffff8aa000888000 RCX: 0000000000000000 RDX: 000000000000000e RSI: ffff8a9fc7ab803e RDI: ffff8a9fd1168700 RBP: ffff8a9fc7ab803e R8: 0000000000700000 R9: 00000000000010ae R10: ffff8a9fcb748980 R11: 0000000000000000 R12: ffff8a9fd1168700 R13: ffff8aa000888000 R14: 00000000002a0000 R15: 00000000000010ae ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffffa25ec6978ce8] vxlan_rcv at ffffffffc10189cd [vxlan] #8 [ffffa25ec6978d90] udp_queue_rcv_one_skb at ffffffff8cfb6507 #9 [ffffa25ec6978dc0] udp_unicast_rcv_skb at ffffffff8cfb6e45 #10 [ffffa25ec6978dc8] __udp4_lib_rcv at ffffffff8cfb8807 #11 [ffffa25ec6978e20] ip_protocol_deliver_rcu at ffffffff8cf76951 #12 [ffffa25ec6978e48] ip_local_deliver at ffffffff8cf76bde #13 [ffffa25ec6978ea0] __netif_receive_skb_one_core at ffffffff8cecde9b #14 [ffffa25ec6978ec8] process_backlog at ffffffff8cece139 #15 [ffffa25ec6978f00] __napi_poll at ffffffff8ceced1a #16 [ffffa25ec6978f28] net_rx_action at ffffffff8cecf1f3 #17 [ffffa25ec6978fa0] __softirqentry_text_start at ffffffff8d4000ca #18 [ffffa25ec6978ff0] do_softirq at ffffffff8c6fbdc3 Reproducer: https://github.com/Mellanox/ovs-tests/blob/master/test-ovs-vxlan-remove-tunnel-during-traffic.sh Fix this by waiting for all sk_user_data reader to finish before releasing the sock. Reported-by: Jianlin Shi Suggested-by: Jakub Sitnicki Fixes: 6a93cc905274 ("udp-tunnel: Add a few more UDP tunnel APIs") Signed-off-by: Hangbin Liu Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/udp_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 8242c8947340..5f8104cf082d 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -176,6 +176,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); + synchronize_rcu(); kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } -- cgit v1.2.3 From 3fff88186f047627bb128d65155f42517f8e448f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 9 Dec 2022 16:28:08 -0800 Subject: mptcp: remove MPTCP 'ifdef' in TCP SYN cookies To ease the maintenance, it is often recommended to avoid having #ifdef preprocessor conditions. Here the section related to CONFIG_MPTCP was quite short but the next commit needs to add more code around. It is then cleaner to move specific MPTCP code to functions located in net/mptcp directory. Now that mptcp_subflow_request_sock_ops structure can be static, it can also be marked as "read only after init". Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 12 ++++++++++-- net/ipv4/syncookies.c | 7 +++---- net/mptcp/subflow.c | 12 +++++++++++- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 412479ebf5ad..3c5c68618fcc 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -97,8 +97,6 @@ struct mptcp_out_options { }; #ifdef CONFIG_MPTCP -extern struct request_sock_ops mptcp_subflow_request_sock_ops; - void mptcp_init(void); static inline bool sk_is_mptcp(const struct sock *sk) @@ -188,6 +186,9 @@ void mptcp_seq_show(struct seq_file *seq); int mptcp_subflow_init_cookie_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb); +struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener); __be32 mptcp_get_reset_option(const struct sk_buff *skb); @@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, return 0; /* TCP fallback */ } +static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + return NULL; +} + static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); } #endif /* CONFIG_MPTCP */ diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 942d2dfa1115..26fb97d1d4d9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -288,12 +288,11 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct tcp_request_sock *treq; struct request_sock *req; -#ifdef CONFIG_MPTCP if (sk_is_mptcp(sk)) - ops = &mptcp_subflow_request_sock_ops; -#endif + req = mptcp_subflow_reqsk_alloc(ops, sk, false); + else + req = inet_reqsk_alloc(ops, sk, false); - req = inet_reqsk_alloc(ops, sk, false); if (!req) return NULL; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2159b5f9988f..3f670f2d5c5c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -529,7 +529,7 @@ static int subflow_v6_rebuild_header(struct sock *sk) } #endif -struct request_sock_ops mptcp_subflow_request_sock_ops; +static struct request_sock_ops mptcp_subflow_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -582,6 +582,16 @@ drop: } #endif +struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + ops = &mptcp_subflow_request_sock_ops; + + return inet_reqsk_alloc(ops, sk_listener, attach_listener); +} +EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); + /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_options_received *mp_opt) -- cgit v1.2.3