summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-22 14:36:01 -0700
committerDavid S. Miller <davem@davemloft.net>2021-06-22 14:36:01 -0700
commit38f75922a6905b010f597fc70dbb5db28398728e (patch)
tree2d4908623c9dfadf7bcd2f9ef69662d27122e8cd /net
parenta432c771e2d9bc059ffe3028faf040c08b6a9f98 (diff)
parentfde56eea01f96b664eb63033990be0fd2a945da5 (diff)
downloadlinux-38f75922a6905b010f597fc70dbb5db28398728e.tar.bz2
Merge branch 'mptcp-C-flag-and-fixes'
Mat Martineau says: ==================== mptcp: Connection-time 'C' flag and two fixes Here are six more patches from the MPTCP tree. Most of them add support for the 'C' flag in the MPTCP connection-time option headers. This flag affects how the initial address and port are treated by each peer. Normally one peer may send MP_JOIN requests to the remote address and port that were used when initiating the MPTCP connection. The 'C' bit indicates that MP_JOINs should only be sent to remote addresses that have been advertised with ADD_ADDR. The other two patches are unrelated improvements. Patches 1-4: Add the 'C' flag feature, a sysctl to optionally enable it, and a selftest. Patch 5: Adjust rp_filter settings in a selftest. Patch 6: Improve rbuf cleanup for MPTCP sockets. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/mptcp/ctrl.c16
-rw-r--r--net/mptcp/options.c13
-rw-r--r--net/mptcp/pm.c1
-rw-r--r--net/mptcp/pm_netlink.c3
-rw-r--r--net/mptcp/protocol.c56
-rw-r--r--net/mptcp/protocol.h12
-rw-r--r--net/mptcp/subflow.c3
7 files changed, 66 insertions, 38 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 6c2639bb9c19..7d738bd06f2c 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -24,6 +24,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
unsigned int add_addr_timeout;
u8 checksum_enabled;
+ u8 allow_join_initial_addr_port;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net)
return mptcp_get_pernet(net)->checksum_enabled;
}
+int mptcp_allow_join_id0(struct net *net)
+{
+ return mptcp_get_pernet(net)->allow_join_initial_addr_port;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0;
+ pernet->allow_join_initial_addr_port = 1;
}
#ifdef CONFIG_SYSCTL
@@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
+ {
+ .procname = "allow_join_initial_addr_port",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
{}
};
@@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled;
+ table[3].data = &pernet->allow_join_initial_addr_port;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 25189595ed1d..a05270996613 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -83,6 +83,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (flags & MPTCP_CAP_CHECKSUM_REQD)
mp_opt->csum_reqd = 1;
+ if (flags & MPTCP_CAP_DENY_JOIN_ID0)
+ mp_opt->deny_join_id0 = 1;
+
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
@@ -360,6 +363,7 @@ void mptcp_get_options(const struct sock *sk,
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
+ mp_opt->deny_join_id0 = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -402,6 +406,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -490,6 +495,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
@@ -827,6 +833,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
opts->csum_reqd = subflow_req->csum_reqd;
+ opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -905,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
+ if (mp_opt->deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
@@ -1201,6 +1211,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (opts->csum_reqd)
flag |= MPTCP_CAP_CHECKSUM_REQD;
+ if (!opts->allow_join_id0)
+ flag |= MPTCP_CAP_DENY_JOIN_ID0;
+
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
flag);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9d00fa6d22e9..639271e09604 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d4732a4f223e..d2591ebf01d9 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check if should create a new subflow */
if (msk->pm.local_addr_used < local_addr_max &&
- msk->pm.subflows < subflows_max) {
+ msk->pm.subflows < subflows_max &&
+ !READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
struct mptcp_addr_info remote = { 0 };
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cf75be02eb00..ce0c45dfb79e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
}
}
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
bool slow;
- int ret;
slow = lock_sock_fast(ssk);
- ret = tcp_can_send_ack(ssk);
- if (ret)
+ if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
- return ret;
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+ const struct inet_connection_sock *icsk = inet_csk(ssk);
+ bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ return (ack_pending & ICSK_ACK_SCHED) &&
+ ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+ READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+ (rx_empty && ack_pending &
+ (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
- struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- bool cleanup;
+ int space = __mptcp_space(sk);
+ bool cleanup, rx_empty;
- /* this is a simple superset of what tcp_cleanup_rbuf() implements
- * so that we don't have to acquire the ssk socket lock most of the time
- * to do actually nothing
- */
- cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
- if (!cleanup)
- return;
+ cleanup = (space > 0) && (space >= (old_space << 1));
+ rx_empty = !atomic_read(&sk->sk_rmem_alloc);
- /* if the hinted ssk is still active, try to use it */
- if (likely(ack_hint)) {
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
- return;
- }
+ if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+ mptcp_subflow_cleanup_rbuf(ssk);
}
-
- /* otherwise pick the first active subflow */
- mptcp_for_each_subflow(msk, subflow)
- if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
- return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
- WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- tcp_cleanup_rbuf(ssk, moved);
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
@@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
- mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
- if (ssk == msk->ack_hint)
- msk->ack_hint = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
@@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
msk->rmem_released = 0;
msk->tx_pending_data = 0;
- msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 160c2ab09f19..d8ad3270dfab 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -79,8 +79,9 @@
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
+#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
-#define MPTCP_CAP_FLAG_MASK (0x3F)
+#define MPTCP_CAP_FLAG_MASK (0x1F)
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
@@ -137,7 +138,8 @@ struct mptcp_options_received {
mp_prio : 1,
echo : 1,
csum_reqd : 1,
- backup : 1;
+ backup : 1,
+ deny_join_id0 : 1;
u32 token;
u32 nonce;
u64 thmac;
@@ -192,6 +194,7 @@ struct mptcp_pm_data {
bool work_pending;
bool accept_addr;
bool accept_subflow;
+ bool remote_deny_join_id0;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
@@ -240,7 +243,6 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
spinlock_t join_list_lock;
- struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -350,7 +352,8 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
- csum_reqd : 1;
+ csum_reqd : 1,
+ allow_join_id0 : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@@ -540,6 +543,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
int mptcp_is_checksum_enabled(struct net *net);
+int mptcp_allow_join_id0(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 585951e7e52f..d55f4ef736a5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -109,6 +109,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
+ subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -407,6 +408,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.csum_reqd)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;