diff options
author | David S. Miller <davem@davemloft.net> | 2021-06-22 14:36:01 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-06-22 14:36:01 -0700 |
commit | 38f75922a6905b010f597fc70dbb5db28398728e (patch) | |
tree | 2d4908623c9dfadf7bcd2f9ef69662d27122e8cd /net | |
parent | a432c771e2d9bc059ffe3028faf040c08b6a9f98 (diff) | |
parent | fde56eea01f96b664eb63033990be0fd2a945da5 (diff) | |
download | linux-38f75922a6905b010f597fc70dbb5db28398728e.tar.bz2 |
Merge branch 'mptcp-C-flag-and-fixes'
Mat Martineau says:
====================
mptcp: Connection-time 'C' flag and two fixes
Here are six more patches from the MPTCP tree.
Most of them add support for the 'C' flag in the MPTCP connection-time
option headers. This flag affects how the initial address and port are
treated by each peer. Normally one peer may send MP_JOIN requests to the
remote address and port that were used when initiating the MPTCP
connection. The 'C' bit indicates that MP_JOINs should only be sent to
remote addresses that have been advertised with ADD_ADDR.
The other two patches are unrelated improvements.
Patches 1-4: Add the 'C' flag feature, a sysctl to optionally enable it,
and a selftest.
Patch 5: Adjust rp_filter settings in a selftest.
Patch 6: Improve rbuf cleanup for MPTCP sockets.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/mptcp/ctrl.c | 16 | ||||
-rw-r--r-- | net/mptcp/options.c | 13 | ||||
-rw-r--r-- | net/mptcp/pm.c | 1 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 3 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 56 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 12 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 3 |
7 files changed, 66 insertions, 38 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 6c2639bb9c19..7d738bd06f2c 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -24,6 +24,7 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; u8 checksum_enabled; + u8 allow_join_initial_addr_port; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net) return mptcp_get_pernet(net)->checksum_enabled; } +int mptcp_allow_join_id0(struct net *net) +{ + return mptcp_get_pernet(net)->allow_join_initial_addr_port; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->checksum_enabled = 0; + pernet->allow_join_initial_addr_port = 1; } #ifdef CONFIG_SYSCTL @@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "allow_join_initial_addr_port", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; + table[3].data = &pernet->allow_join_initial_addr_port; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 25189595ed1d..a05270996613 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -83,6 +83,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (flags & MPTCP_CAP_CHECKSUM_REQD) mp_opt->csum_reqd = 1; + if (flags & MPTCP_CAP_DENY_JOIN_ID0) + mp_opt->deny_join_id0 = 1; + mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { mp_opt->sndr_key = get_unaligned_be64(ptr); @@ -360,6 +363,7 @@ void mptcp_get_options(const struct sock *sk, mp_opt->mp_prio = 0; mp_opt->reset = 0; mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); + mp_opt->deny_join_id0 = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -402,6 +406,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -490,6 +495,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; opts->csum_reqd = READ_ONCE(msk->csum_enabled); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK @@ -827,6 +833,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; opts->csum_reqd = subflow_req->csum_reqd; + opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@ -905,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return false; } + if (mp_opt->deny_join_id0) + WRITE_ONCE(msk->pm.remote_deny_join_id0, true); + if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); mptcp_subflow_fully_established(subflow, mp_opt); @@ -1201,6 +1211,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (opts->csum_reqd) flag |= MPTCP_CAP_CHECKSUM_REQD; + if (!opts->allow_join_id0) + flag |= MPTCP_CAP_DENY_JOIN_ID0; + *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, flag); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 9d00fa6d22e9..639271e09604 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.accept_addr, false); WRITE_ONCE(msk->pm.accept_subflow, false); + WRITE_ONCE(msk->pm.remote_deny_join_id0, false); msk->pm.status = 0; spin_lock_init(&msk->pm.lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d4732a4f223e..d2591ebf01d9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check if should create a new subflow */ if (msk->pm.local_addr_used < local_addr_max && - msk->pm.subflows < subflows_max) { + msk->pm.subflows < subflows_max && + !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); if (local) { struct mptcp_addr_info remote = { 0 }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cf75be02eb00..ce0c45dfb79e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk) } } -static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) { bool slow; - int ret; slow = lock_sock_fast(ssk); - ret = tcp_can_send_ack(ssk); - if (ret) + if (tcp_can_send_ack(ssk)) tcp_cleanup_rbuf(ssk, 1); unlock_sock_fast(ssk, slow); - return ret; +} + +static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) +{ + const struct inet_connection_sock *icsk = inet_csk(ssk); + bool ack_pending = READ_ONCE(icsk->icsk_ack.pending); + const struct tcp_sock *tp = tcp_sk(ssk); + + return (ack_pending & ICSK_ACK_SCHED) && + ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) > + READ_ONCE(icsk->icsk_ack.rcv_mss)) || + (rx_empty && ack_pending & + (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) { - struct sock *ack_hint = READ_ONCE(msk->ack_hint); int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - bool cleanup; + int space = __mptcp_space(sk); + bool cleanup, rx_empty; - /* this is a simple superset of what tcp_cleanup_rbuf() implements - * so that we don't have to acquire the ssk socket lock most of the time - * to do actually nothing - */ - cleanup = __mptcp_space(sk) - old_space >= max(0, old_space); - if (!cleanup) - return; + cleanup = (space > 0) && (space >= (old_space << 1)); + rx_empty = !atomic_read(&sk->sk_rmem_alloc); - /* if the hinted ssk is still active, try to use it */ - if (likely(ack_hint)) { - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk)) - return; - } + if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) + mptcp_subflow_cleanup_rbuf(ssk); } - - /* otherwise pick the first active subflow */ - mptcp_for_each_subflow(msk, subflow) - if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow))) - return; } static bool mptcp_check_data_fin(struct sock *sk) @@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, break; } } while (more_data_avail); - WRITE_ONCE(msk->ack_hint, ssk); *bytes += moved; return done; @@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) __mptcp_update_rmem(sk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); - tcp_cleanup_rbuf(ssk, moved); if (unlikely(ssk->sk_err)) __mptcp_error_report(sk); @@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) ret |= __mptcp_ofo_queue(msk); __mptcp_splice_receive_queue(sk); mptcp_data_unlock(sk); - mptcp_cleanup_rbuf(msk); } if (ret) mptcp_check_data_fin((struct sock *)msk); @@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk == msk->last_snd) msk->last_snd = NULL; - if (ssk == msk->ack_hint) - msk->ack_hint = NULL; - if (ssk == msk->first) msk->first = NULL; @@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk) msk->rmem_released = 0; msk->tx_pending_data = 0; - msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 160c2ab09f19..d8ad3270dfab 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -79,8 +79,9 @@ #define MPTCP_VERSION_MASK (0x0F) #define MPTCP_CAP_CHECKSUM_REQD BIT(7) #define MPTCP_CAP_EXTENSIBILITY BIT(6) +#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) #define MPTCP_CAP_HMAC_SHA256 BIT(0) -#define MPTCP_CAP_FLAG_MASK (0x3F) +#define MPTCP_CAP_FLAG_MASK (0x1F) /* MPTCP DSS flags */ #define MPTCP_DSS_DATA_FIN BIT(4) @@ -137,7 +138,8 @@ struct mptcp_options_received { mp_prio : 1, echo : 1, csum_reqd : 1, - backup : 1; + backup : 1, + deny_join_id0 : 1; u32 token; u32 nonce; u64 thmac; @@ -192,6 +194,7 @@ struct mptcp_pm_data { bool work_pending; bool accept_addr; bool accept_subflow; + bool remote_deny_join_id0; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; @@ -240,7 +243,6 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; spinlock_t join_list_lock; - struct sock *ack_hint; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -350,7 +352,8 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, - csum_reqd : 1; + csum_reqd : 1, + allow_join_id0 : 1; u8 local_id; u8 remote_id; u64 local_key; @@ -540,6 +543,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); int mptcp_is_checksum_enabled(struct net *net); +int mptcp_allow_join_id0(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 585951e7e52f..d55f4ef736a5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -109,6 +109,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); + subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@ -407,6 +408,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (mp_opt.csum_reqd) WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); + if (mp_opt.deny_join_id0) + WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; |