summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorStanislav Fomichev <sdf@google.com>2021-01-15 08:34:59 -0800
committerAlexei Starovoitov <ast@kernel.org>2021-01-20 14:23:00 -0800
commit9cacf81f8161111db25f98e78a7a0e32ae142b3f (patch)
treef20ef5d2014e204f3084ebbea54515427fdef43e /net
parent13ca51d5eb358edcb673afccb48c3440b9fda21b (diff)
downloadlinux-9cacf81f8161111db25f98e78a7a0e32ae142b3f.tar.bz2
bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE
Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/socket.c3
4 files changed, 19 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 856ae516ac18..26aa923cf522 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4099,6 +4099,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
lock_sock(sk);
err = tcp_zerocopy_receive(sk, &zc);
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
+ &zc, &len, err);
release_sock(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, err))
goto zerocopy_rcv_sk_err;
@@ -4133,6 +4135,18 @@ zerocopy_rcv_out:
return 0;
}
+bool tcp_bpf_bypass_getsockopt(int level, int optname)
+{
+ /* TCP do_tcp_getsockopt has optimized getsockopt implementation
+ * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE.
+ */
+ if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
+
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen)
{
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 777306b5bc22..62b6fd385a47 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2793,6 +2793,7 @@ struct proto tcp_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0e1509b02cb3..8539715ff035 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2121,6 +2121,7 @@ struct proto tcpv6_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/socket.c b/net/socket.c
index 33e8b6c4e1d3..7f0617ab5437 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2126,6 +2126,9 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
return __sys_setsockopt(fd, level, optname, optval, optlen);
}
+INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
+ int optname));
+
/*
* Get a socket option. Because we don't know the option lengths we have
* to pass a user mode parameter for the protocols to sort out.