summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c111
-rw-r--r--net/core/sock.c16
2 files changed, 114 insertions, 13 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f9..8637b2b71f3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
{
int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+ int wanted;
- while (deferred--)
- static_key_slow_dec(&netstamp_needed);
+ wanted = atomic_add_return(deferred, &netstamp_wanted);
+ if (wanted > 0)
+ static_key_enable(&netstamp_needed);
+ else
+ static_key_disable(&netstamp_needed);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif
void net_enable_timestamp(void)
{
+#ifdef HAVE_JUMP_LABEL
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 0)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+ return;
+ }
+ atomic_inc(&netstamp_needed_deferred);
+ schedule_work(&netstamp_work);
+#else
static_key_slow_inc(&netstamp_needed);
+#endif
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
#ifdef HAVE_JUMP_LABEL
- /* net_disable_timestamp() can be called from non process context */
- atomic_inc(&netstamp_needed_deferred);
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 1)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+ return;
+ }
+ atomic_dec(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
static_key_slow_dec(&netstamp_needed);
@@ -4884,6 +4911,39 @@ void __napi_schedule(struct napi_struct *n)
EXPORT_SYMBOL(__napi_schedule);
/**
+ * napi_schedule_prep - check if napi can be scheduled
+ * @n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running. This is used as a condition variable
+ * insure only one NAPI poll instance runs. We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ if (unlikely(val & NAPIF_STATE_DISABLE))
+ return false;
+ new = val | NAPIF_STATE_SCHED;
+
+ /* Sets STATE_MISSED bit if STATE_SCHED was already set
+ * This was suggested by Alexander Duyck, as compiler
+ * emits better code than :
+ * if (val & NAPIF_STATE_SCHED)
+ * new |= NAPIF_STATE_MISSED;
+ */
+ new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+ NAPIF_STATE_MISSED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
+/**
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
@@ -4897,7 +4957,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
bool napi_complete_done(struct napi_struct *n, int work_done)
{
- unsigned long flags;
+ unsigned long flags, val, new;
/*
* 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4987,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
list_del_init(&n->poll_list);
local_irq_restore(flags);
}
- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+ do {
+ val = READ_ONCE(n->state);
+
+ WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+ /* If STATE_MISSED was set, leave STATE_SCHED set,
+ * because we will call napi->poll() one more time.
+ * This C code was suggested by Alexander Duyck to help gcc.
+ */
+ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+ NAPIF_STATE_SCHED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ if (unlikely(val & NAPIF_STATE_MISSED)) {
+ __napi_schedule(n);
+ return false;
+ }
+
return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5033,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
{
int rc;
+ /* Busy polling means there is a high chance device driver hard irq
+ * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+ * set in napi_schedule_prep().
+ * Since we are about to call napi->poll() once more, we can safely
+ * clear NAPI_STATE_MISSED.
+ *
+ * Note: x86 could use a single "lock and ..." instruction
+ * to perform these two clear_bit()
+ */
+ clear_bit(NAPI_STATE_MISSED, &napi->state);
clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
local_bh_disable();
@@ -5088,8 +5178,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
struct napi_struct *napi;
napi = container_of(timer, struct napi_struct, timer);
- if (napi->gro_list)
- napi_schedule_irqoff(napi);
+
+ /* Note : we use a relaxed variant of napi_schedule_prep() not setting
+ * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+ */
+ if (napi->gro_list && !napi_disable_pending(napi) &&
+ !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+ __napi_schedule_irqoff(napi);
return HRTIMER_NORESTART;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e863..f6fd79f33097 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- bh_unlock_sock(newsk);
- sk_free(newsk);
+ sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
@@ -1592,6 +1588,16 @@ out:
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
+void sk_free_unlock_clone(struct sock *sk)
+{
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ sk->sk_destruct = NULL;
+ bh_unlock_sock(sk);
+ sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;