Merge branch 'net-reduce-tcp_memory_allocated-inflation'

Eric Dumazet says: ==================== net: reduce tcp_memory_allocated inflation Hosts with a lot of sockets tend to hit so called TCP memory pressure, leading to very bad TCP performance and/or OOM. The problem is that some TCP sockets can hold up to 2MB of 'forward allocations' in their per-socket cache (sk->sk_forward_alloc), and there is no mechanism to make them relinquish their share under mem pressure. Only under some potentially rare events their share is reclaimed, one socket at a time. In this series, I implemented a per-cpu cache instead of a per-socket one. Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order to not dirty tcp_memory_allocated shared cache line too often. We keep sk->sk_forward_alloc values as small as possible, to meet memcg page granularity constraint. Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH is defined to 32 pages, which seems a bit small. Note that while this cover letter mentions TCP, this work is generic and supports TCP, UDP, DECNET, SCTP. ==================== Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2022-06-10 16:21:39 -0700
committer: Jakub Kicinski <kuba@kernel.org> 2022-06-10 16:21:40 -0700
commit: e10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
tree: e061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp
parent: 5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent: 0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
download: linux-e10b02ee5b6c95872064cf0a8e65f31951a31967.tar.bz2
5 files changed, 9 insertions, 15 deletions
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 35928fefae33..fa500ea3a1f1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1523,11 +1523,11 @@ static __init int sctp_init(void)
 	limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
 	max_share = min(4UL*1024*1024, limit);
 
-	sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
+	sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
 	sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
-	sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
+	sysctl_sctp_wmem[0] = PAGE_SIZE;
 	sysctl_sctp_wmem[1] = 16*1024;
 	sysctl_sctp_wmem[2] = max(64*1024, max_share);
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 52edee1322fc..f6ee7f4040c1 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 			pr_debug("%s: under pressure, reneging for tsn:%u\n",
 				 __func__, tsn);
 			deliver = SCTP_CMD_RENEGE;
-		} else {
-			sk_mem_reclaim(sk);
 		}
 	}
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6d37d2dfb3da..171f1a35d205 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
+static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc);
 struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
@@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 	if (sctp_wspace(asoc) < (int)msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	if (sk_under_memory_pressure(sk))
-		sk_mem_reclaim(sk);
-
 	if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
 		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -9194,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
-		if (sk_under_memory_pressure(sk))
-			sk_mem_reclaim(sk);
 		if ((int)msg_len <= sctp_wspace(asoc) &&
 		    sk_wmem_schedule(sk, msg_len))
 			break;
@@ -9657,7 +9653,10 @@ struct proto sctp_prot = {
 	.sysctl_wmem =  sysctl_sctp_wmem,
 	.memory_pressure = &sctp_memory_pressure,
 	.enter_memory_pressure = sctp_enter_memory_pressure,
+
 	.memory_allocated = &sctp_memory_allocated,
+	.per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 
@@ -9700,7 +9699,10 @@ struct proto sctpv6_prot = {
 	.sysctl_wmem	= sysctl_sctp_wmem,
 	.memory_pressure = &sctp_memory_pressure,
 	.enter_memory_pressure = sctp_enter_memory_pressure,
+
 	.memory_allocated = &sctp_memory_allocated,
+	.per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 #endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 6b13f737ebf2..bb22b71df7a3 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 
 	if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
 		sctp_intl_start_pd(ulpq, gfp);
-
-	sk_mem_reclaim(asoc->base.sk);
 }
 
 static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 407fed46931b..0a8510a0c5e6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 		else if (retval == 1)
 			sctp_ulpq_reasm_drain(ulpq);
 	}
-
-	sk_mem_reclaim(asoc->base.sk);
 }
 
-
-
 /* Notify the application if an association is aborted and in
  * partial delivery mode.  Send up any pending received messages.
  */
author	Jakub Kicinski <kuba@kernel.org>	2022-06-10 16:21:39 -0700
committer	Jakub Kicinski <kuba@kernel.org>	2022-06-10 16:21:40 -0700
commit	e10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
tree	e061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp
parent	5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent	0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
download	linux-e10b02ee5b6c95872064cf0a8e65f31951a31967.tar.bz2