diff options
author | David S. Miller <davem@davemloft.net> | 2019-11-06 17:33:32 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-11-06 17:33:32 -0800 |
commit | 9990a79d8fe21ba9598e8d98383552a9fea632da (patch) | |
tree | c15a43aa3ea0d35d17e7e2bcbadd19a07439bfd2 | |
parent | 17fdd7638cb687cd7f15a48545f25d738f0101e0 (diff) | |
parent | 41098af59d8d753aa8d3bb4310cc4ecb61fc82c7 (diff) | |
download | linux-9990a79d8fe21ba9598e8d98383552a9fea632da.tar.bz2 |
Merge branch 'net-tls-add-a-TX-lock'
Jakub Kicinski says:
====================
net/tls: add a TX lock
Some time ago Pooja and Mallesham started reporting crashes with
an async accelerator. After trying to poke the existing logic into
shape I came to the conclusion that it can't be trusted, and to
preserve our sanity we should just add a lock around the TX side.
First patch removes the sk_write_pending checks from the write
space callbacks. Those don't seem to have a logical justification.
Patch 2 adds the TX lock and patch 3 associated test (which should
hang with current net).
Mallesham reports that even with these fixes applied the async
accelerator workload still occasionally hangs waiting for socket
memory. I suspect that's strictly related to the way async crypto
is integrated in TLS, so I think we should get these into net or
net-next and move from there.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/tls.h | 5 | ||||
-rw-r--r-- | net/tls/tls_device.c | 10 | ||||
-rw-r--r-- | net/tls/tls_main.c | 2 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 30 | ||||
-rw-r--r-- | tools/testing/selftests/net/tls.c | 108 |
5 files changed, 134 insertions, 21 deletions
diff --git a/include/net/tls.h b/include/net/tls.h index c664e6dba0d1..794e297483ea 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include <linux/socket.h> #include <linux/tcp.h> #include <linux/skmsg.h> +#include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/rcupdate.h> @@ -269,6 +270,10 @@ struct tls_context { bool in_tcp_sendpages; bool pending_open_record_frags; + + struct mutex tx_lock; /* protects partially_sent_* fields and + * per-type TX fields + */ unsigned long flags; /* cache cold stuff */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f959487c5cd1..683d00837693 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -523,8 +523,10 @@ last_record: int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; + struct tls_context *tls_ctx = tls_get_ctx(sk); int rc; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; char *kaddr = kmap(page); struct kvec iov; @@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, if (flags & MSG_SENDPAGE_NOTLAST) flags |= MSG_MORE; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (flags & MSG_OOB) { @@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } @@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) void tls_device_write_space(struct sock *sk, struct tls_context *ctx) { - if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) { + if (tls_is_partially_sent_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; + WARN_ON_ONCE(sk->sk_write_pending); + sk->sk_allocation = GFP_ATOMIC; tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL | diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac88877dcade..0775ae40fcfb 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx) memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); + mutex_destroy(&ctx->tx_lock); if (sk) kfree_rcu(ctx, rcu); @@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk) if (!ctx) return NULL; + mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = sk->sk_prot; return ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c2b5e0d2ba1a..446f23c1f3ce 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto send_end; - } - if (unlikely(msg->msg_controllen)) { ret = tls_proccess_cmsg(sk, msg, &record_type); if (ret) { @@ -1091,6 +1085,7 @@ send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return copied ? copied : ret; } @@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto sendpage_end; - } - /* Call the sk_stream functions to manage the sndbuf mem. */ while (size > 0) { size_t copy, required_size; @@ -1219,15 +1207,18 @@ sendpage_end: int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); int ret; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return ret; } @@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); } void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) @@ -2180,12 +2173,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* Schedule the transmission if tx list is ready */ - if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, - &tx_ctx->tx_bitmask)) - schedule_delayed_work(&tx_ctx->tx_work.work, 0); - } + if (is_tx_ready(tx_ctx) && + !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) + schedule_delayed_work(&tx_ctx->tx_work.work, 0); } void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4c285b6e1db8..1c8f194d6556 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -898,6 +898,114 @@ TEST_F(tls, nonblocking) } } +static void +test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, + bool sendpg, unsigned int n_readers, unsigned int n_writers) +{ + const unsigned int n_children = n_readers + n_writers; + const size_t data = 6 * 1000 * 1000; + const size_t file_sz = data / 100; + size_t read_bias, write_bias; + int i, fd, child_id; + char buf[file_sz]; + pid_t pid; + + /* Only allow multiples for simplicity */ + ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true); + read_bias = n_writers / n_readers ?: 1; + write_bias = n_readers / n_writers ?: 1; + + /* prep a file to send */ + fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600); + ASSERT_GE(fd, 0); + + memset(buf, 0xac, file_sz); + ASSERT_EQ(write(fd, buf, file_sz), file_sz); + + /* spawn children */ + for (child_id = 0; child_id < n_children; child_id++) { + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) + break; + } + + /* parent waits for all children */ + if (pid) { + for (i = 0; i < n_children; i++) { + int status; + + wait(&status); + EXPECT_EQ(status, 0); + } + + return; + } + + /* Split threads for reading and writing */ + if (child_id < n_readers) { + size_t left = data * read_bias; + char rb[8001]; + + while (left) { + int res; + + res = recv(self->cfd, rb, + left > sizeof(rb) ? sizeof(rb) : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } else { + size_t left = data * write_bias; + + while (left) { + int res; + + ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0); + if (sendpg) + res = sendfile(self->fd, fd, NULL, + left > file_sz ? file_sz : left); + else + res = send(self->fd, buf, + left > file_sz ? file_sz : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } +} + +TEST_F(tls, mutliproc_even) +{ + test_mutliproc(_metadata, self, false, 6, 6); +} + +TEST_F(tls, mutliproc_readers) +{ + test_mutliproc(_metadata, self, false, 4, 12); +} + +TEST_F(tls, mutliproc_writers) +{ + test_mutliproc(_metadata, self, false, 10, 2); +} + +TEST_F(tls, mutliproc_sendpage_even) +{ + test_mutliproc(_metadata, self, true, 6, 6); +} + +TEST_F(tls, mutliproc_sendpage_readers) +{ + test_mutliproc(_metadata, self, true, 4, 12); +} + +TEST_F(tls, mutliproc_sendpage_writers) +{ + test_mutliproc(_metadata, self, true, 10, 2); +} + TEST_F(tls, control_msg) { if (self->notls) |