diff options
author | David S. Miller <davem@davemloft.net> | 2020-12-09 19:31:58 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-12-09 19:31:58 -0800 |
commit | 0f86a5be10956aaa77f92d9c25ec508c5495e8af (patch) | |
tree | 4835210f79ccb092abb8aff17daecae4f21acd38 | |
parent | 7bdddc68cda3e0c9ef7711238206881cf016bdbb (diff) | |
parent | d7b1bfd0832c1d005f571203306b6c50e9805150 (diff) | |
download | linux-0f86a5be10956aaa77f92d9c25ec508c5495e8af.tar.bz2 |
Merge branch 'mptcp-fixes'
Paolo Abeni says:
====================
mptcp: a bunch of fixes
This series includes a few fixes following-up the
recent code refactor for the MPTCP RX and TX paths.
Boundling them together, since the fixes are somewhat
related.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/mptcp/options.c | 7 | ||||
-rw-r--r-- | net/mptcp/pm.c | 8 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 23 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 1 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 19 |
5 files changed, 48 insertions, 10 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d1b4c5d208a9..1ca60d9da3ef 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -802,7 +802,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); fully_established: - if (likely(subflow->pm_notified)) + /* if the subflow is not already linked into the conn_list, we can't + * notify the PM: this subflow is still on the listener queue + * and the PM possibly acquiring the subflow lock could race with + * the listener close + */ + if (likely(subflow->pm_notified) || list_empty(&subflow->node)) return true; subflow->pm_notified = 1; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 9256bd5d02ed..da2ed576f289 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -126,8 +126,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk) spin_lock_bh(&pm->lock); - if (READ_ONCE(pm->work_pending)) + /* mptcp_pm_fully_established() can be invoked by multiple + * racing paths - accept() and check_fully_established() + * be sure to serve this event only once. + */ + if (READ_ONCE(pm->work_pending) && + !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 57213ff60f78..2540d82742ac 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -701,6 +701,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) int sk_rbuf, ssk_rbuf; bool wake; + /* The peer can send data while we are shutting down this + * subflow at msk destruction time, but we must avoid enqueuing + * more data to the msk receive queue + */ + if (unlikely(subflow->disposable)) + return; + /* move_skbs_to_msk below can legitly clear the data_avail flag, * but we will need later to properly woke the reader, cache its * value @@ -2119,6 +2126,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_orphan(ssk); } + subflow->disposable = 1; + /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops * the ssk has been already destroyed, we just need to release the * reference owned by msk; @@ -2126,8 +2135,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (!inet_csk(ssk)->icsk_ulp_ops) { kfree_rcu(subflow, rcu); } else { - /* otherwise ask tcp do dispose of ssk and subflow ctx */ - subflow->disposable = 1; + /* otherwise tcp will dispose of the ssk and subflow ctx */ __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -3208,6 +3216,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, bool slowpath; slowpath = lock_sock_fast(newsk); + + /* PM/worker can now acquire the first subflow socket + * lock without racing with listener queue cleanup, + * we can notify it, if needed. + */ + subflow = mptcp_subflow_ctx(msk->first); + list_add(&subflow->node, &msk->conn_list); + sock_hold(msk->first); + if (mptcp_is_fully_established(newsk)) + mptcp_pm_fully_established(msk); + mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9ff6fd486db6..f6c3c686a34a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -165,6 +165,7 @@ enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, + MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_SUBFLOW_ESTABLISHED, }; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 5f5815a1665f..fefcaf497938 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -614,8 +614,9 @@ create_child: */ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); - /* link the newly created socket to the msk */ - mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx); + /* record the newly created socket as the first msk + * subflow, but don't link it yet into conn_list + */ WRITE_ONCE(mptcp_sk(new_msk)->first, child); /* new mpc subflow takes ownership of the newly @@ -1148,13 +1149,18 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr); + mptcp_add_pending_subflow(msk, subflow); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) - goto failed; + goto failed_unlink; - mptcp_add_pending_subflow(msk, subflow); return err; +failed_unlink: + spin_lock_bh(&msk->join_list_lock); + list_del(&subflow->node); + spin_unlock_bh(&msk->join_list_lock); + failed: subflow->disposable = 1; sock_release(sf); @@ -1333,9 +1339,10 @@ static void subflow_ulp_release(struct sock *ssk) sk = ctx->conn; if (sk) { /* if the msk has been orphaned, keep the ctx - * alive, will be freed by mptcp_done() + * alive, will be freed by __mptcp_close_ssk(), + * when the subflow is still unaccepted */ - release = ctx->disposable; + release = ctx->disposable || list_empty(&ctx->node); sock_put(sk); } |