diff options
author | David S. Miller <davem@davemloft.net> | 2022-05-16 11:33:59 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-05-16 11:33:59 +0100 |
commit | ee3398c78767b1fe9f5cdac04295abb96496d3e4 (patch) | |
tree | d5be1a42e32e55e228aeee48bc252b5fa2db4d8b | |
parent | 3daebfbeb4555cb0c113aeb88aa469192ee41d89 (diff) | |
parent | 909876500251b3b48480a840bbf9053588254eee (diff) | |
download | linux-ee3398c78767b1fe9f5cdac04295abb96496d3e4.tar.bz2 |
Merge branch 'net-skb-defer-freeing-polish'
Eric Dumazet says:
====================
net: polish skb defer freeing
While testing this recently added feature on a variety
of platforms/configurations, I found the following issues:
1) A race leading to concurrent calls to smp_call_function_single_async()
2) Missed opportunity to use napi_consume_skb()
3) Need to limit the max length of the per-cpu lists.
4) Process the per-cpu list more frequently, for the
(unusual) case where net_rx_action() has mutiple
napi_poll() to process per round.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/admin-guide/sysctl/net.rst | 8 | ||||
-rw-r--r-- | include/linux/netdevice.h | 1 | ||||
-rw-r--r-- | net/core/dev.c | 15 | ||||
-rw-r--r-- | net/core/dev.h | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 18 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 8 |
6 files changed, 38 insertions, 14 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index f86b5e1623c6..fa4dcdb283cf 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -322,6 +322,14 @@ a leaked reference faster. A larger value may be useful to prevent false warnings on slow/loaded systems. Default value is 10, minimum 1, maximum 3600. +skb_defer_max +------------- + +Max size (in skbs) of the per-cpu list of skbs being freed +by the cpu which allocated them. Used by TCP stack so far. + +Default: 64 + optmem_max ---------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d57ce248004c..cbaf312e365b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3136,6 +3136,7 @@ struct softnet_data { /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; int defer_count; + int defer_ipi_scheduled; struct sk_buff *defer_list; call_single_data_t defer_csd; }; diff --git a/net/core/dev.c b/net/core/dev.c index d93456c75b55..04fd056f7f74 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4330,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000; EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; +unsigned int sysctl_skb_defer_max __read_mostly = 64; int netdev_budget __read_mostly = 300; /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; @@ -4582,9 +4583,12 @@ static void rps_trigger_softirq(void *data) #endif /* CONFIG_RPS */ /* Called from hardirq (IPI) context */ -static void trigger_rx_softirq(void *data __always_unused) +static void trigger_rx_softirq(void *data) { + struct softnet_data *sd = data; + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + smp_store_release(&sd->defer_ipi_scheduled, 0); } /* @@ -6630,7 +6634,7 @@ static void skb_defer_free_flush(struct softnet_data *sd) while (skb != NULL) { next = skb->next; - __kfree_skb(skb); + napi_consume_skb(skb, 1); skb = next; } } @@ -6651,6 +6655,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) for (;;) { struct napi_struct *n; + skb_defer_free_flush(sd); + if (list_empty(&list)) { if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) goto end; @@ -6680,8 +6686,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); -end: - skb_defer_free_flush(sd); +end:; } struct netdev_adjacent { @@ -11382,7 +11387,7 @@ static int __init net_dev_init(void) INIT_CSD(&sd->csd, rps_trigger_softirq, sd); sd->cpu = i; #endif - INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL); + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); spin_lock_init(&sd->defer_lock); init_gro_hash(&sd->backlog); diff --git a/net/core/dev.h b/net/core/dev.h index 328b37af90ba..cbb8a925175a 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ extern int netdev_budget; extern unsigned int netdev_budget_usecs; - +extern unsigned int sysctl_skb_defer_max; extern int netdev_tstamp_prequeue; extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2fea964f09d8..1d10bb4adec1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,6 +80,7 @@ #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> +#include "dev.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; @@ -6496,16 +6497,21 @@ void skb_attempt_defer_free(struct sk_buff *skb) int cpu = skb->alloc_cpu; struct softnet_data *sd; unsigned long flags; + unsigned int defer_max; bool kick; if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || !cpu_online(cpu) || cpu == raw_smp_processor_id()) { - __kfree_skb(skb); +nodefer: __kfree_skb(skb); return; } sd = &per_cpu(softnet_data, cpu); + defer_max = READ_ONCE(sysctl_skb_defer_max); + if (READ_ONCE(sd->defer_count) >= defer_max) + goto nodefer; + /* We do not send an IPI or any signal. * Remote cpu will eventually call skb_defer_free_flush() */ @@ -6515,18 +6521,14 @@ void skb_attempt_defer_free(struct sk_buff *skb) WRITE_ONCE(sd->defer_list, skb); sd->defer_count++; - /* kick every time queue length reaches 128. - * This should avoid blocking in smp_call_function_single_async(). - * This condition should hardly be bit under normal conditions, - * unless cpu suddenly stopped to receive NIC interrupts. - */ - kick = sd->defer_count == 128; + /* Send an IPI every time queue reaches half capacity. */ + kick = sd->defer_count == (defer_max >> 1); spin_unlock_irqrestore(&sd->defer_lock, flags); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). */ - if (unlikely(kick)) + if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) smp_call_function_single_async(cpu, &sd->defer_csd); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 195ca5c28771..ca8d38325e1e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -578,6 +578,14 @@ static struct ctl_table net_core_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &int_3600, }, + { + .procname = "skb_defer_max", + .data = &sysctl_skb_defer_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, { } }; |