diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-12-20 23:47:10 +0100 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-12-20 23:47:11 +0100 |
commit | 1cf4a0ccc506b5c027afc5eaf3fddc83f96f31e7 (patch) | |
tree | cd64f2bdd6ac693f204b18938c5836df2bcc698e /net/tls | |
parent | 77ea5f4cbe2084db9ab021ba73fb7eadf1610884 (diff) | |
parent | 28cb6f1eaffdc5a6a9707cac55f4a43aa3fd7895 (diff) | |
download | linux-1cf4a0ccc506b5c027afc5eaf3fddc83f96f31e7.tar.bz2 |
Merge branch 'bpf-sockmap-fixes-and-improvements'
John Fastabend says:
====================
Set of bpf fixes and improvements to make sockmap with kTLS usable
with "real" applications. This set came as the fallout of pulling
kTLS+sockmap into Cilium[1] and running in container environment.
Roughly broken into three parts,
Patches 1-3: resolve/improve handling of size field in sk_msg_md
Patch 4: it became difficult to use this in Cilium when the
SK_PASS verdict was not correctly handle. So handle
the case correctly.
Patch 5-8: Set of issues found while running OpenSSL TX kTLS
enabled applications. This resolves the most obvious
issues and gets applications using kTLS TX up and
running with sock{map|has}.
Other than the "sk_msg, zap ingress queue on psock down" (PATCH 6/8)
which can potentially cause a WARNING the issues fixed in this
series do not cause kernel side warnings, BUG, etc. but instead
cause stalls and other odd behavior in the user space applications
when using kTLS with BPF policies applied.
Primarily tested with 'curl' compiled with latest openssl and
also 'openssl s_client/s_server' containers using Cilium network
plugin with docker/k8s. Some basic testing with httpd was also
enabled. Cilium CI tests will be added shortly to cover these
cases as well. We also have 'wrk' and other test and benchmarking
tools we can run now.
We have two more sets of patches currently under testing that
will be sent shortly to address a few more issues. First the
OpenSSL RX kTLS side breaks when both sk_msg and sk_skb_verdict
programs are used with kTLS, the sk_skb_verdict programs are
not enforced. Second skmsg needs to call into tcp stack to
send to indicate consumed data.
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/tls')
-rw-r--r-- | net/tls/tls_main.c | 14 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 43 |
2 files changed, 42 insertions, 15 deletions
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 311cec8e533d..acff12999c06 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -55,6 +55,8 @@ enum { static struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto *saved_tcpv4_prot; +static DEFINE_MUTEX(tcpv4_prot_mutex); static LIST_HEAD(device_list); static DEFINE_MUTEX(device_mutex); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; @@ -690,6 +692,16 @@ static int tls_init(struct sock *sk) mutex_unlock(&tcpv6_prot_mutex); } + if (ip_ver == TLSV4 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) { + mutex_lock(&tcpv4_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv4_prot)) { + build_protos(tls_prots[TLSV4], sk->sk_prot); + smp_store_release(&saved_tcpv4_prot, sk->sk_prot); + } + mutex_unlock(&tcpv4_prot_mutex); + } + ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; update_sk_prot(sk, ctx); @@ -721,8 +733,6 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static int __init tls_register(void) { - build_protos(tls_prots[TLSV4], &tcp_prot); - tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d4ecc66464e6..5aee9ae5ca53 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -686,12 +686,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, struct sk_psock *psock; struct sock *sk_redir; struct tls_rec *rec; + bool enospc, policy; int err = 0, send; u32 delta = 0; - bool enospc; + policy = !(flags & MSG_SENDPAGE_NOPOLICY); psock = sk_psock_get(sk); - if (!psock) + if (!psock || !policy) return tls_push_record(sk, flags, record_type); more_data: enospc = sk_msg_full(msg); @@ -1017,8 +1018,8 @@ send_end: return copied ? copied : ret; } -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +int tls_sw_do_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) { long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -1033,15 +1034,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, int ret = 0; bool eor; - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST)) - return -ENOTSUPP; - - /* No MSG_EOR from splice, only look at MSG_MORE */ eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); - - lock_sock(sk); - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* Wait till there is any pending write on socket */ @@ -1145,10 +1138,34 @@ wait_for_memory: } sendpage_end: ret = sk_stream_error(sk, flags, ret); - release_sock(sk); return copied ? copied : ret; } +int tls_sw_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -ENOTSUPP; + + return tls_sw_do_sendpage(sk, page, offset, size, flags); +} + +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + int ret; + + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -ENOTSUPP; + + lock_sock(sk); + ret = tls_sw_do_sendpage(sk, page, offset, size, flags); + release_sock(sk); + return ret; +} + static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, int flags, long timeo, int *err) { |