summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt20
-rw-r--r--include/net/tcp.h41
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c93
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_fastopen.c233
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c336
-rwxr-xr-xtools/testing/selftests/net/tcp_fastopen_backup_key.sh55
11 files changed, 694 insertions, 118 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..a73b3a02e49a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
0 to disable the blackhole detection.
By default, it is set to 1hr.
+tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
+ The list consists of a primary key and an optional backup key. The
+ primary key is used for both creating and validating cookies, while the
+ optional backup key is only used for validating cookies. The purpose of
+ the backup key is to maximize TFO validation when keys are rotated.
+
+ A randomly chosen primary key may be configured by the kernel if
+ the tcp_fastopen sysctl is set to 0x400 (see above), or if the
+ TCP_FASTOPEN setsockopt() optname is set and a key has not been
+ previously configured via sysctl. If keys are configured via
+ setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
+ per-socket keys will be used instead of any keys that are specified via
+ sysctl.
+
+ A key is specified as 4 8-digit hexadecimal integers which are separated
+ by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
+ omitted. A primary and a backup key may be specified by separating them
+ by a comma. If only one key is specified, it becomes the primary key and
+ any previously configured backup keys are removed.
+
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 127. Default value
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 985aa5db570c..0083a14fb64f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
void tcp_fastopen_destroy_cipher(struct sock *sk);
void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len);
+ void *primary_key, void *backup_key,
+ unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
#define TCP_FASTOPEN_KEY_LENGTH 16
+#define TCP_FASTOPEN_KEY_MAX 2
+#define TCP_FASTOPEN_KEY_BUF_LENGTH \
+ (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
/* Fastopen key context */
struct tcp_fastopen_context {
- struct crypto_cipher *tfm;
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct rcu_head rcu;
};
@@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
+/* Caller needs to wrap with rcu_read_(un)lock() */
+static inline
+struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+ return ctx;
+}
+
+static inline
+bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
+ const struct tcp_fastopen_cookie *orig)
+{
+ if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
+ orig->len == foc->len &&
+ !memcmp(orig->val, foc->val, foc->len))
+ return true;
+ return false;
+}
+
+static inline
+int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
+{
+ if (ctx->tfm[1])
+ return 2;
+ return 1;
+}
+
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
*/
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 86dc24a96c90..74904e9d1b72 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -283,6 +283,7 @@ enum
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
+ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */
__LINUX_MIB_MAX
};
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b613572c6616..4746f963c439 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+ SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 875867b64d6a..90f09e47198b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
+static int sscanf_key(char *buf, __le32 *key)
+{
+ u32 user_key[4];
+ int i, ret = 0;
+
+ if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1,
+ user_key + 2, user_key + 3) != 4) {
+ ret = -EINVAL;
+ } else {
+ for (i = 0; i < ARRAY_SIZE(user_key); i++)
+ key[i] = cpu_to_le32(user_key[i]);
+ }
+ pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+ user_key[0], user_key[1], user_key[2], user_key[3], buf, ret);
+
+ return ret;
+}
+
static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
ipv4.sysctl_tcp_fastopen);
- struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
- struct tcp_fastopen_context *ctxt;
- u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
- __le32 key[4];
- int ret, i;
+ /* maxlen to print the list of keys in hex (*2), with dashes
+ * separating doublewords and a comma in between keys.
+ */
+ struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+ 2 * TCP_FASTOPEN_KEY_MAX) +
+ (TCP_FASTOPEN_KEY_MAX * 5)) };
+ struct tcp_fastopen_context *ctx;
+ u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
+ __le32 key[TCP_FASTOPEN_KEY_MAX * 4];
+ char *backup_data;
+ int ret, i = 0, off = 0, n_keys = 0;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
- if (ctxt)
- memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
- else
- memset(key, 0, sizeof(key));
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctx) {
+ n_keys = tcp_fastopen_context_len(ctx);
+ memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
+ }
rcu_read_unlock();
- for (i = 0; i < ARRAY_SIZE(key); i++)
+ if (!n_keys) {
+ memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
+ n_keys = 1;
+ }
+
+ for (i = 0; i < n_keys * 4; i++)
user_key[i] = le32_to_cpu(key[i]);
- snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
- user_key[0], user_key[1], user_key[2], user_key[3]);
+ for (i = 0; i < n_keys; i++) {
+ off += snprintf(tbl.data + off, tbl.maxlen - off,
+ "%08x-%08x-%08x-%08x",
+ user_key[i * 4],
+ user_key[i * 4 + 1],
+ user_key[i * 4 + 2],
+ user_key[i * 4 + 3]);
+ if (i + 1 < n_keys)
+ off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
+ }
+
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0) {
- if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
- user_key + 2, user_key + 3) != 4) {
+ backup_data = strchr(tbl.data, ',');
+ if (backup_data) {
+ *backup_data = '\0';
+ backup_data++;
+ }
+ if (sscanf_key(tbl.data, key)) {
ret = -EINVAL;
goto bad_key;
}
-
- for (i = 0; i < ARRAY_SIZE(user_key); i++)
- key[i] = cpu_to_le32(user_key[i]);
-
+ if (backup_data) {
+ if (sscanf_key(backup_data, key + 4)) {
+ ret = -EINVAL;
+ goto bad_key;
+ }
+ }
tcp_fastopen_reset_cipher(net, NULL, key,
+ backup_data ? key + 4 : NULL,
TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
- pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
- user_key[0], user_key[1], user_key[2], user_key[3],
- (char *)tbl.data, ret);
kfree(tbl.data);
return ret;
}
@@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = {
.procname = "tcp_fastopen_key",
.mode = 0600,
.data = &init_net.ipv4.sysctl_tcp_fastopen,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ /* maxlen to print the list of keys in hex (*2), with dashes
+ * separating doublewords and a comma in between keys.
+ */
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+ 2 * TCP_FASTOPEN_KEY_MAX) +
+ (TCP_FASTOPEN_KEY_MAX * 5)),
.proc_handler = proc_tcp_fastopen_key,
},
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 53d61ca3ac4b..27ce13ece510 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return err;
}
case TCP_FASTOPEN_KEY: {
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
+ __u8 *backup_key = NULL;
- if (optlen != sizeof(key))
+ /* Allow a backup key as well to facilitate key rotation
+ * First key is the active one.
+ */
+ if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
+ optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
return -EINVAL;
if (copy_from_user(key, optval, optlen))
return -EFAULT;
- return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+ if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
+ backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
+
+ return tcp_fastopen_reset_cipher(net, sk, key, backup_key,
+ TCP_FASTOPEN_KEY_LENGTH);
}
default:
/* fallthru */
@@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
case TCP_FASTOPEN_KEY: {
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct tcp_fastopen_context *ctx;
+ unsigned int key_len = 0;
if (get_user(len, optlen))
return -EFAULT;
rcu_read_lock();
ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
- if (ctx)
- memcpy(key, ctx->key, sizeof(key));
- else
- len = 0;
+ if (ctx) {
+ key_len = tcp_fastopen_context_len(ctx) *
+ TCP_FASTOPEN_KEY_LENGTH;
+ memcpy(&key[0], &ctx->key[0], key_len);
+ }
rcu_read_unlock();
- len = min_t(unsigned int, len, sizeof(key));
+ len = min_t(unsigned int, len, key_len);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, key, len))
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..8e1580485c9e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net)
* for a valid cookie, so this is an acceptable risk.
*/
get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
{
struct tcp_fastopen_context *ctx =
container_of(head, struct tcp_fastopen_context, rcu);
- crypto_free_cipher(ctx->tfm);
+ int i;
+
+ /* We own ctx, thus no need to hold the Fastopen-lock */
+ for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) {
+ if (ctx->tfm[i])
+ crypto_free_cipher(ctx->tfm[i]);
+ }
kfree(ctx);
}
@@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
}
+struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key,
+ void *backup_key,
+ unsigned int len)
+{
+ struct tcp_fastopen_context *new_ctx;
+ void *key = primary_key;
+ int err, i;
+
+ new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
+ if (!new_ctx)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++)
+ new_ctx->tfm[i] = NULL;
+ for (i = 0; i < (backup_key ? 2 : 1); i++) {
+ new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(new_ctx->tfm[i])) {
+ err = PTR_ERR(new_ctx->tfm[i]);
+ new_ctx->tfm[i] = NULL;
+ pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
+ goto out;
+ }
+ err = crypto_cipher_setkey(new_ctx->tfm[i], key, len);
+ if (err) {
+ pr_err("TCP: TFO cipher key error: %d\n", err);
+ goto out;
+ }
+ memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len);
+ key = backup_key;
+ }
+ return new_ctx;
+out:
+ tcp_fastopen_ctx_free(&new_ctx->rcu);
+ return ERR_PTR(err);
+}
+
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len)
+ void *primary_key, void *backup_key,
+ unsigned int len)
{
struct tcp_fastopen_context *ctx, *octx;
struct fastopen_queue *q;
- int err;
+ int err = 0;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
- if (IS_ERR(ctx->tfm)) {
- err = PTR_ERR(ctx->tfm);
-error: kfree(ctx);
- pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
- return err;
+ ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
}
- err = crypto_cipher_setkey(ctx->tfm, key, len);
- if (err) {
- pr_err("TCP: TFO cipher key error: %d\n", err);
- crypto_free_cipher(ctx->tfm);
- goto error;
- }
- memcpy(ctx->key, key, len);
-
-
spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
@@ -108,28 +135,42 @@ error: kfree(ctx);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+out:
return err;
}
-static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
- struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
+ struct sk_buff *syn,
+ struct crypto_cipher *tfm,
+ struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_context *ctx;
- bool ok = false;
-
- rcu_read_lock();
+ if (req->rsk_ops->family == AF_INET) {
+ const struct iphdr *iph = ip_hdr(syn);
+ __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
- if (!ctx)
- ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+ crypto_cipher_encrypt_one(tfm, foc->val, (void *)path);
+ foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ return true;
+ }
- if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (req->rsk_ops->family == AF_INET6) {
+ const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+ struct tcp_fastopen_cookie tmp;
+ struct in6_addr *buf;
+ int i;
+
+ crypto_cipher_encrypt_one(tfm, tmp.val,
+ (void *)&ip6h->saddr);
+ buf = &tmp.addr;
+ for (i = 0; i < 4; i++)
+ buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+ crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
- ok = true;
+ return true;
}
- rcu_read_unlock();
- return ok;
+#endif
+ return false;
}
/* Generate the fastopen cookie by doing aes128 encryption on both
@@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct sock *sk,
+static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
- if (req->rsk_ops->family == AF_INET) {
- const struct iphdr *iph = ip_hdr(syn);
-
- __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(sk, path, foc);
- }
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (req->rsk_ops->family == AF_INET6) {
- const struct ipv6hdr *ip6h = ipv6_hdr(syn);
- struct tcp_fastopen_cookie tmp;
-
- if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
- struct in6_addr *buf = &tmp.addr;
- int i;
+ struct tcp_fastopen_context *ctx;
- for (i = 0; i < 4; i++)
- buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(sk, buf, foc);
- }
- }
-#endif
- return false;
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (ctx)
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc);
+ rcu_read_unlock();
}
-
/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
*/
@@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tcp_fin(sk);
}
+/* returns 0 - no key match, 1 for primary, 2 for backup */
+static int tcp_fastopen_cookie_gen_check(struct sock *sk,
+ struct request_sock *req,
+ struct sk_buff *syn,
+ struct tcp_fastopen_cookie *orig,
+ struct tcp_fastopen_cookie *valid_foc)
+{
+ struct tcp_fastopen_cookie search_foc = { .len = -1 };
+ struct tcp_fastopen_cookie *foc = valid_foc;
+ struct tcp_fastopen_context *ctx;
+ int i, ret = 0;
+
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (!ctx)
+ goto out;
+ for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc);
+ if (tcp_fastopen_cookie_match(foc, orig)) {
+ ret = i + 1;
+ goto out;
+ }
+ foc = &search_foc;
+ }
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req)
@@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
+ int ret = 0;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
- if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
- foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
- foc->len == valid_foc.len &&
- !memcmp(foc->val, valid_foc.val, foc->len)) {
- /* Cookie is valid. Create a (full) child socket to accept
- * the data in SYN before returning a SYN-ACK to ack the
- * data. If we fail to create the socket, fall back and
- * ack the ISN only but includes the same cookie.
- *
- * Note: Data-less SYN with valid cookie is allowed to send
- * data in SYN_RECV state.
- */
+ if (foc->len == 0) {
+ /* Client requests a cookie. */
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+ } else if (foc->len > 0) {
+ ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
+ &valid_foc);
+ if (!ret) {
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ } else {
+ /* Cookie is valid. Create a (full) child socket to
+ * accept the data in SYN before returning a SYN-ACK to
+ * ack the data. If we fail to create the socket, fall
+ * back and ack the ISN only but includes the same
+ * cookie.
+ *
+ * Note: Data-less SYN with valid cookie is allowed to
+ * send data in SYN_RECV state.
+ */
fastopen:
- child = tcp_fastopen_create_child(sk, skb, req);
- if (child) {
- foc->len = -1;
+ child = tcp_fastopen_create_child(sk, skb, req);
+ if (child) {
+ if (ret == 2) {
+ valid_foc.exp = foc->exp;
+ *foc = valid_foc;
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
+ } else {
+ foc->len = -1;
+ }
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVE);
+ return child;
+ }
NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVE);
- return child;
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
}
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (foc->len > 0) /* Client presents an invalid cookie */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-
+ }
valid_foc.exp = foc->exp;
*foc = valid_foc;
return NULL;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 99a4e41d5249..4ce0bc1612f5 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -20,3 +20,4 @@ ip_defrag
so_txtime
flowlabel
flowlabel_mgr
+tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8343fb9d8a46..9a275d932fd5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -10,12 +10,14 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
new file mode 100644
index 000000000000..58bb77d9e7e1
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test key rotation for TFO.
+ * New keys are 'rotated' in two steps:
+ * 1) Add new key as the 'backup' key 'behind' the primary key
+ * 2) Make new key the primary by swapping the backup and primary keys
+ *
+ * The rotation is done in stages using multiple sockets bound
+ * to the same port via SO_REUSEPORT. This simulates key rotation
+ * behind say a load balancer. We verify that across the rotation
+ * there are no cases in which a cookie is not accepted by verifying
+ * that TcpExtTCPFastOpenPassiveFail remains 0.
+ */
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <netinet/tcp.h>
+#include <fcntl.h>
+#include <time.h>
+
+#ifndef TCP_FASTOPEN_KEY
+#define TCP_FASTOPEN_KEY 33
+#endif
+
+#define N_LISTEN 10
+#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
+#define KEY_LENGTH 16
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static bool do_ipv6;
+static bool do_sockopt;
+static bool do_rotate;
+static int key_len = KEY_LENGTH;
+static int rcv_fds[N_LISTEN];
+static int proc_fd;
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const int PORT = 8891;
+
+static void get_keys(int fd, uint32_t *keys)
+{
+ char buf[128];
+ int len = KEY_LENGTH * 2;
+
+ if (do_sockopt) {
+ if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
+ error(1, errno, "Unable to get key");
+ return;
+ }
+ lseek(proc_fd, 0, SEEK_SET);
+ if (read(proc_fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
+ if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
+ keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
+ error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
+}
+
+static void set_keys(int fd, uint32_t *keys)
+{
+ char buf[128];
+
+ if (do_sockopt) {
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
+ key_len))
+ error(1, errno, "Unable to set key");
+ return;
+ }
+ if (do_rotate)
+ snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
+ keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
+ keys[6], keys[7]);
+ else
+ snprintf(buf, 128, "%08x-%08x-%08x-%08x",
+ keys[0], keys[1], keys[2], keys[3]);
+ lseek(proc_fd, 0, SEEK_SET);
+ if (write(proc_fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
+}
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds)
+{
+ struct sockaddr_in addr4 = {0};
+ struct sockaddr_in6 addr6 = {0};
+ struct sockaddr *addr;
+ int opt = 1, i, sz;
+ int qlen = 100;
+ uint32_t keys[8];
+
+ switch (family) {
+ case AF_INET:
+ addr4.sin_family = family;
+ addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4.sin_port = htons(PORT);
+ sz = sizeof(addr4);
+ addr = (struct sockaddr *)&addr4;
+ break;
+ case AF_INET6:
+ addr6.sin6_family = AF_INET6;
+ addr6.sin6_addr = in6addr_any;
+ addr6.sin6_port = htons(PORT);
+ sz = sizeof(addr6);
+ addr = (struct sockaddr *)&addr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ /* clang does not recognize error() above as terminating
+ * the program, so it complains that saddr, sz are
+ * not initialized when this code path is taken. Silence it.
+ */
+ return;
+ }
+ for (i = 0; i < ARRAY_SIZE(keys); i++)
+ keys[i] = rand();
+ for (i = 0; i < N_LISTEN; i++) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+ if (bind(rcv_fds[i], addr, sz))
+ error(1, errno, "failed to bind receive socket");
+ if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
+ sizeof(qlen)))
+ error(1, errno, "failed to set TCP_FASTOPEN");
+ set_keys(rcv_fds[i], keys);
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static int connect_and_send(int family, int proto)
+{
+ struct sockaddr_in saddr4 = {0};
+ struct sockaddr_in daddr4 = {0};
+ struct sockaddr_in6 saddr6 = {0};
+ struct sockaddr_in6 daddr6 = {0};
+ struct sockaddr *saddr, *daddr;
+ int fd, sz, ret;
+ char data[1];
+
+ switch (family) {
+ case AF_INET:
+ saddr4.sin_family = AF_INET;
+ saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4.sin_port = 0;
+
+ daddr4.sin_family = AF_INET;
+ if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+ error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+ daddr4.sin_port = htons(PORT);
+
+ sz = sizeof(saddr4);
+ saddr = (struct sockaddr *)&saddr4;
+ daddr = (struct sockaddr *)&daddr4;
+ break;
+ case AF_INET6:
+ saddr6.sin6_family = AF_INET6;
+ saddr6.sin6_addr = in6addr_any;
+
+ daddr6.sin6_family = AF_INET6;
+ if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+ error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+ daddr6.sin6_port = htons(PORT);
+
+ sz = sizeof(saddr6);
+ saddr = (struct sockaddr *)&saddr6;
+ daddr = (struct sockaddr *)&daddr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ /* clang does not recognize error() above as terminating
+ * the program, so it complains that saddr, daddr, sz are
+ * not initialized when this code path is taken. Silence it.
+ */
+ return -1;
+ }
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+ if (bind(fd, saddr, sz))
+ error(1, errno, "failed to bind send socket");
+ data[0] = 'a';
+ ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
+ if (ret != 1)
+ error(1, errno, "failed to sendto");
+
+ return fd;
+}
+
+static bool is_listen_fd(int fd)
+{
+ int i;
+
+ for (i = 0; i < N_LISTEN; i++) {
+ if (rcv_fds[i] == fd)
+ return true;
+ }
+ return false;
+}
+
+static int rotate_key(int fd)
+{
+ static int iter;
+ static uint32_t new_key[4];
+ uint32_t keys[8];
+ uint32_t tmp_key[4];
+ int i;
+ int len = KEY_LENGTH * 2;
+
+ if (iter < N_LISTEN) {
+ /* first set new key as backups */
+ if (iter == 0) {
+ for (i = 0; i < ARRAY_SIZE(new_key); i++)
+ new_key[i] = rand();
+ }
+ get_keys(fd, keys);
+ memcpy(keys + 4, new_key, KEY_LENGTH);
+ set_keys(fd, keys);
+ } else {
+ /* swap the keys */
+ get_keys(fd, keys);
+ memcpy(tmp_key, keys + 4, KEY_LENGTH);
+ memcpy(keys + 4, keys, KEY_LENGTH);
+ memcpy(keys, tmp_key, KEY_LENGTH);
+ set_keys(fd, keys);
+ }
+ if (++iter >= (N_LISTEN * 2))
+ iter = 0;
+}
+
+static void run_one_test(int family)
+{
+ struct epoll_event ev;
+ int i, send_fd;
+ int n_loops = 10000;
+ int rotate_key_fd = 0;
+ int key_rotate_interval = 50;
+ int fd, epfd;
+ char buf[1];
+
+ build_rcv_fd(family, SOCK_STREAM, rcv_fds);
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ ev.events = EPOLLIN;
+ for (i = 0; i < N_LISTEN; i++) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+ while (n_loops--) {
+ send_fd = connect_and_send(family, SOCK_STREAM);
+ if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
+ rotate_key(rcv_fds[rotate_key_fd]);
+ if (++rotate_key_fd >= N_LISTEN)
+ rotate_key_fd = 0;
+ }
+ while (1) {
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+ if (is_listen_fd(ev.data.fd)) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ ev.data.fd = fd;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+ error(1, errno, "failed epoll add");
+ continue;
+ }
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ if (i != 1)
+ error(1, errno, "failed recv data");
+ if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
+ error(1, errno, "failed epoll del");
+ close(ev.data.fd);
+ break;
+ }
+ close(send_fd);
+ }
+ for (i = 0; i < N_LISTEN; i++)
+ close(rcv_fds[i]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46sr")) != -1) {
+ switch (c) {
+ case '4':
+ do_ipv6 = false;
+ break;
+ case '6':
+ do_ipv6 = true;
+ break;
+ case 's':
+ do_sockopt = true;
+ break;
+ case 'r':
+ do_rotate = true;
+ key_len = KEY_LENGTH * 2;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
+ if (proc_fd < 0)
+ error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
+ srand(time(NULL));
+ if (do_ipv6)
+ run_one_test(AF_INET6);
+ else
+ run_one_test(AF_INET);
+ close(proc_fd);
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
new file mode 100755
index 000000000000..41476399e184
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# rotate TFO keys for ipv4/ipv6 and verify that the client does
+# not present an invalid cookie.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
+ >/dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+ # flush routes before each run, otherwise successive runs can
+ # initially present an old TFO cookie
+ ip netns exec "${NETNS}" ip tcp_metrics flush
+ ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
+ val=$(ip netns exec "${NETNS}" nstat -az | \
+ grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
+ if [ $val -ne 0 ]; then
+ echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
+ return 1
+ fi
+}
+
+do_test "-4"
+do_test "-6"
+do_test "-4"
+do_test "-6"
+do_test "-4s"
+do_test "-6s"
+do_test "-4s"
+do_test "-6s"
+do_test "-4r"
+do_test "-6r"
+do_test "-4r"
+do_test "-6r"
+do_test "-4sr"
+do_test "-6sr"
+do_test "-4sr"
+do_test "-6sr"
+echo "all tests done"