summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:39 -0700
committerJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:40 -0700
commite10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
treee061107c999e33aac6a61f87cd45a24cd4258422 /net/ipv6
parent5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
downloadlinux-e10b02ee5b6c95872064cf0a8e65f31951a31967.tar.bz2
Merge branch 'net-reduce-tcp_memory_allocated-inflation'
Eric Dumazet says: ==================== net: reduce tcp_memory_allocated inflation Hosts with a lot of sockets tend to hit so called TCP memory pressure, leading to very bad TCP performance and/or OOM. The problem is that some TCP sockets can hold up to 2MB of 'forward allocations' in their per-socket cache (sk->sk_forward_alloc), and there is no mechanism to make them relinquish their share under mem pressure. Only under some potentially rare events their share is reclaimed, one socket at a time. In this series, I implemented a per-cpu cache instead of a per-socket one. Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order to not dirty tcp_memory_allocated shared cache line too often. We keep sk->sk_forward_alloc values as small as possible, to meet memcg page granularity constraint. Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH is defined to 32 pages, which seems a bit small. Note that while this cover letter mentions TCP, this work is generic and supports TCP, UDP, DECNET, SCTP. ==================== Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/udplite.c3
3 files changed, 9 insertions, 0 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f37dd4aa91c6..c72448ba6dc9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2159,7 +2159,10 @@ struct proto tcpv6_prot = {
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 55afd7f39c04..be074f07073a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1740,7 +1740,10 @@ struct proto udpv6_prot = {
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
+
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index fbb700d3f437..b70725856259 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -48,7 +48,10 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,