summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-06-07 16:37:14 -0700
committerDavid S. Miller <davem@davemloft.net>2016-06-07 16:37:14 -0700
commit34fe76abbea5174e532681e420fb31139909efb4 (patch)
tree1f241506d6b781b65d1033925f1c1ce6a39c3394
parent64151ae36ed93c45654069c8aff2a7f0125075e8 (diff)
parentedb09eb17ed89eaa82a52dd306beac93e292b485 (diff)
downloadlinux-34fe76abbea5174e532681e420fb31139909efb4.tar.bz2
Merge branch 'net-sched-fast-stats'
Eric Dumazet says: ==================== net: sched: faster stats gathering A while back, I sent one RFC patch using lockless stats gathering on 64bit arches. This patch series does it more cleanly, using a seqcount. Since qdisc/class stats are written at dequeue() time, we can ask the dequeue to change the seqcount, so that stats readers can avoid taking the root qdisc lock, and instead the typical read_seqcount_{begin|retry} guarded loop. This does not change fast path costs, as the seqcount increments are not more expensive than the bit manipulation, and allows readers to not freeze the fast path anymore. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/gen_stats.txt2
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/ppp/ppp_generic.c3
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/net/gen_stats.h12
-rw-r--r--include/net/sch_generic.h23
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/gen_estimator.c24
-rw-r--r--net/core/gen_stats.c34
-rw-r--r--net/ieee802154/6lowpan/core.c3
-rw-r--r--net/l2tp/l2tp_eth.c4
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/sched/act_api.c4
-rw-r--r--net/sched/act_police.c3
-rw-r--r--net/sched/sch_api.c21
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_fq_codel.c15
-rw-r--r--net/sched/sch_generic.c14
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_htb.c11
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c11
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c9
29 files changed, 158 insertions, 85 deletions
diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt
index ff630a87b511..179b18ce45ff 100644
--- a/Documentation/networking/gen_stats.txt
+++ b/Documentation/networking/gen_stats.txt
@@ -21,7 +21,7 @@ struct mystruct {
...
};
-Update statistics:
+Update statistics, in dequeue() methods only, (while owning qdisc->running)
mystruct->tstats.packet++;
mystruct->qstats.backlog += skb->pkt_len;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 941ec99cd3b6..681af31a60ed 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4610,6 +4610,7 @@ static int bond_check_params(struct bond_params *params)
static struct lock_class_key bonding_netdev_xmit_lock_key;
static struct lock_class_key bonding_netdev_addr_lock_key;
static struct lock_class_key bonding_tx_busylock_key;
+static struct lock_class_key bonding_qdisc_running_key;
static void bond_set_lockdep_class_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -4625,6 +4626,7 @@ static void bond_set_lockdep_class(struct net_device *dev)
&bonding_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
dev->qdisc_tx_busylock = &bonding_tx_busylock_key;
+ dev->qdisc_running_key = &bonding_qdisc_running_key;
}
/* Called from registration process */
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 8dedafa1a95d..aeabaa42317f 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1313,9 +1313,12 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64)
}
static struct lock_class_key ppp_tx_busylock;
+static struct lock_class_key ppp_qdisc_running_key;
+
static int ppp_dev_init(struct net_device *dev)
{
dev->qdisc_tx_busylock = &ppp_tx_busylock;
+ dev->qdisc_running_key = &ppp_qdisc_running_key;
return 0;
}
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2ace126533cd..00eb38956a2c 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1577,6 +1577,7 @@ static const struct team_option team_options[] = {
static struct lock_class_key team_netdev_xmit_lock_key;
static struct lock_class_key team_netdev_addr_lock_key;
static struct lock_class_key team_tx_busylock_key;
+static struct lock_class_key team_qdisc_running_key;
static void team_set_lockdep_class_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -1590,6 +1591,7 @@ static void team_set_lockdep_class(struct net_device *dev)
lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL);
dev->qdisc_tx_busylock = &team_tx_busylock_key;
+ dev->qdisc_running_key = &team_qdisc_running_key;
}
static int team_init(struct net_device *dev)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa6df2699532..59d7e06d88d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1862,6 +1862,7 @@ struct net_device {
#endif
struct phy_device *phydev;
struct lock_class_key *qdisc_tx_busylock;
+ struct lock_class_key *qdisc_running_key;
bool proto_down;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 610cd397890e..231e121cc7d9 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);
-int gnet_stats_copy_basic(struct gnet_dump *d,
+int gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+void __gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
@@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
- spinlock_t *stats_lock, struct nlattr *opt);
+ spinlock_t *stats_lock,
+ seqcount_t *running, struct nlattr *opt);
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_rate_est64 *rate_est);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
- spinlock_t *stats_lock, struct nlattr *opt);
+ spinlock_t *stats_lock,
+ seqcount_t *running, struct nlattr *opt);
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
const struct gnet_stats_rate_est64 *rate_est);
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a1fd76c22a59..c4f5749342ec 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -29,13 +29,6 @@ enum qdisc_state_t {
__QDISC_STATE_THROTTLED,
};
-/*
- * following bits are only changed while qdisc lock is held
- */
-enum qdisc___state_t {
- __QDISC___STATE_RUNNING = 1,
-};
-
struct qdisc_size_table {
struct rcu_head rcu;
struct list_head list;
@@ -93,7 +86,7 @@ struct Qdisc {
unsigned long state;
struct sk_buff_head q;
struct gnet_stats_basic_packed bstats;
- unsigned int __state;
+ seqcount_t running;
struct gnet_stats_queue qstats;
struct rcu_head rcu_head;
int padded;
@@ -104,20 +97,20 @@ struct Qdisc {
static inline bool qdisc_is_running(const struct Qdisc *qdisc)
{
- return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
+ return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc_is_running(qdisc))
return false;
- qdisc->__state |= __QDISC___STATE_RUNNING;
+ write_seqcount_begin(&qdisc->running);
return true;
}
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
- qdisc->__state &= ~__QDISC___STATE_RUNNING;
+ write_seqcount_end(&qdisc->running);
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -321,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
return qdisc_lock(root);
}
+static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+{
+ struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+ ASSERT_RTNL();
+ return &root->running;
+}
+
static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->dev;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 780089d75915..977a11e418d0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -629,6 +629,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
static struct lock_class_key bt_tx_busylock;
static struct lock_class_key bt_netdev_xmit_lock_key;
+static struct lock_class_key bt_qdisc_running_key;
static void bt_set_lockdep_class_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -641,6 +642,7 @@ static int bt_dev_init(struct net_device *dev)
{
netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL);
dev->qdisc_tx_busylock = &bt_tx_busylock;
+ dev->qdisc_running_key = &bt_qdisc_running_key;
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 896b686d1966..e0bcc39f4a7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3075,7 +3075,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 4573d81093fe..cad8e791f28e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -84,6 +84,7 @@ struct gen_estimator
struct gnet_stats_basic_packed *bstats;
struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
+ seqcount_t *running;
int ewma_log;
u32 last_packets;
unsigned long avpps;
@@ -121,26 +122,28 @@ static void est_timer(unsigned long arg)
unsigned long rate;
u64 brate;
- spin_lock(e->stats_lock);
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
read_lock(&est_lock);
if (e->bstats == NULL)
goto skip;
- __gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
+ __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
brate = (b.bytes - e->last_bytes)<<(7 - idx);
e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
- e->rate_est->bps = (e->avbps+0xF)>>5;
+ WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
rate = b.packets - e->last_packets;
rate <<= (7 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- e->rate_est->pps = (e->avpps + 0xF) >> 5;
+ WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
skip:
read_unlock(&est_lock);
- spin_unlock(e->stats_lock);
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
}
if (!list_empty(&elist[idx].list))
@@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
+ seqcount_t *running,
struct nlattr *opt)
{
struct gen_estimator *est;
@@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;
- __gnet_stats_copy_basic(&b, cpu_bstats, bstats);
+ __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
+ est->running = running;
est->ewma_log = parm->ewma_log;
est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
@@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
@@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
- spinlock_t *stats_lock, struct nlattr *opt)
+ spinlock_t *stats_lock,
+ seqcount_t *running, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
+ return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index f96ee8b9478d..d9c210caff32 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
return 0;
nla_put_failure:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
@@ -65,15 +66,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
{
memset(d, 0, sizeof(*d));
- spin_lock_bh(lock);
- d->lock = lock;
if (type)
d->tail = (struct nlattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
d->padattr = padattr;
-
+ if (lock) {
+ d->lock = lock;
+ spin_lock_bh(lock);
+ }
if (d->tail)
return gnet_stats_copy(d, type, NULL, 0, padattr);
@@ -126,16 +128,23 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
}
void
-__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+__gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
+ unsigned int seq;
+
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
- } else {
+ return;
+ }
+ do {
+ if (running)
+ seq = read_seqcount_begin(running);
bstats->bytes = b->bytes;
bstats->packets = b->packets;
- }
+ } while (running && read_seqcount_retry(running, seq));
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
@@ -152,13 +161,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(struct gnet_dump *d,
+gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
struct gnet_stats_basic_packed bstats = {0};
- __gnet_stats_copy_basic(&bstats, cpu, b);
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
if (d->compat_tc_stats) {
d->tc_stats.bytes = bstats.bytes;
@@ -328,8 +338,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
return 0;
err_out:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
EXPORT_SYMBOL(gnet_stats_copy_app);
@@ -363,10 +374,11 @@ gnet_stats_finish_copy(struct gnet_dump *d)
return -1;
}
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index dd085db8580e..14aa5effd29a 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -60,6 +60,7 @@ static struct header_ops lowpan_header_ops = {
static struct lock_class_key lowpan_tx_busylock;
static struct lock_class_key lowpan_netdev_xmit_lock_key;
+static struct lock_class_key lowpan_qdisc_running_key;
static void lowpan_set_lockdep_class_one(struct net_device *ldev,
struct netdev_queue *txq,
@@ -73,6 +74,8 @@ static int lowpan_dev_init(struct net_device *ldev)
{
netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ ldev->qdisc_running_key = &lowpan_qdisc_running_key;
+
return 0;
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index e253c26f31ac..c00d72d182fa 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -68,6 +68,8 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
}
static struct lock_class_key l2tp_eth_tx_busylock;
+static struct lock_class_key l2tp_qdisc_running_key;
+
static int l2tp_eth_dev_init(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
@@ -76,6 +78,8 @@ static int l2tp_eth_dev_init(struct net_device *dev)
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
+ dev->qdisc_running_key = &l2tp_qdisc_running_key;
+
return 0;
}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 604df6fae6fc..515131f9e021 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -137,7 +137,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.ewma_log = info->ewma_log;
ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
- &est->lock, &cfg.opt);
+ &est->lock, NULL, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 719bc2e85852..b6db56ec8117 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -287,7 +287,7 @@ err2:
if (est) {
err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
&p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ &p->tcfc_lock, NULL, est);
if (err) {
free_percpu(p->cpu_qstats);
goto err2;
@@ -671,7 +671,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 820b11686f85..bcb31142556b 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -185,7 +185,8 @@ override:
if (est) {
err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
- &police->tcf_lock, est);
+ &police->tcf_lock,
+ NULL, est);
if (err)
goto failure_unlock;
} else if (tb[TCA_POLICE_AVRATE] &&
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ddf047df5361..d4a8bbfcc953 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -982,7 +982,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
- spinlock_t *root_lock;
+ seqcount_t *running;
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT)
@@ -991,14 +991,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if ((sch->parent != TC_H_ROOT) &&
!(sch->flags & TCQ_F_INGRESS) &&
(!p || !(p->flags & TCQ_F_MQROOT)))
- root_lock = qdisc_root_sleeping_lock(sch);
+ running = qdisc_root_sleeping_running(sch);
else
- root_lock = qdisc_lock(sch);
+ running = &sch->running;
err = gen_new_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- root_lock,
+ NULL,
+ running,
tca[TCA_RATE]);
if (err)
goto err_out4;
@@ -1061,7 +1062,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
gen_replace_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
}
out:
@@ -1369,8 +1371,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1381,7 +1382,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
cpu_qstats = q->cpu_qstats;
}
- if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+ &d, cpu_bstats, &q->bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
@@ -1684,8 +1686,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1911af3ca7c0..34f8f79e56d5 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -637,7 +637,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &flow->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index baafddf229ce..1b8128fb845d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1600,7 +1600,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
@@ -1755,7 +1756,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_put_rtab(rtab);
@@ -1848,7 +1850,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a63e879e8975..1b7e1a27773d 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_destroy(cl->qdisc);
@@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (qlen)
xstats.deficit = cl->deficit;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6883a8971562..1daa54237f4e 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -566,11 +566,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.memory_usage = q->memory_usage;
st.qdisc_stats.drop_overmemory = q->drop_overmemory;
+ sch_tree_lock(sch);
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
list_for_each(pos, &q->old_flows)
st.qdisc_stats.old_flows_len++;
+ sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
}
@@ -624,7 +626,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx < q->flows_cnt) {
const struct fq_codel_flow *flow = &q->flows[idx];
- const struct sk_buff *skb = flow->head;
+ const struct sk_buff *skb;
memset(&xstats, 0, sizeof(xstats));
xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
@@ -642,9 +644,14 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
codel_time_to_us(delta) :
-codel_time_to_us(-delta);
}
- while (skb) {
- qs.qlen++;
- skb = skb->next;
+ if (flow->head) {
+ sch_tree_lock(sch);
+ skb = flow->head;
+ while (skb) {
+ qs.qlen++;
+ skb = skb->next;
+ }
+ sch_tree_unlock(sch);
}
qs.backlog = q->backlogs[idx];
qs.drops = flow->dropped;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 269dd71b3828..cebea73e70ac 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -110,7 +110,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
/*
* Transmit possibly several skbs, and handle the return status as
- * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * required. Owning running seqcount bit guarantees that
* only one CPU can execute this function.
*
* Returns to the caller:
@@ -137,10 +137,10 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_UNLOCK(dev, txq);
} else {
- spin_lock(root_lock);
+ spin_lock_nested(root_lock, SINGLE_DEPTH_NESTING);
return qdisc_qlen(q);
}
- spin_lock(root_lock);
+ spin_lock_nested(root_lock, SINGLE_DEPTH_NESTING);
if (dev_xmit_complete(ret)) {
/* Driver sent out skb successfully or skb was consumed */
@@ -163,7 +163,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
/*
* NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
- * __QDISC___STATE_RUNNING guarantees only one CPU can process
+ * running seqcount guarantees only one CPU can process
* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
* this queue.
*
@@ -379,6 +379,7 @@ struct Qdisc noop_qdisc = {
.list = LIST_HEAD_INIT(noop_qdisc.list),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
+ .running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
};
EXPORT_SYMBOL(noop_qdisc);
@@ -537,6 +538,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops)
@@ -570,6 +572,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+ seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
+
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d783d7cc3348..74813dd49053 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1015,11 +1015,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cur_time = psched_get_time();
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -1068,7 +1067,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
@@ -1373,7 +1373,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.work = cl->cl_total;
xstats.rtwork = cl->cl_cumul;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d4b4218af6b1..2b057649f24b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1141,7 +1141,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
@@ -1395,7 +1396,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
kfree(cl);
@@ -1457,11 +1459,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent->children++;
} else {
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 56a77b878eb3..b9439827c172 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b8002ce3d010..549c66359924 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
* hold here is the look on dev_queue->qdisc_sleeping
* also acquired below.
*/
- spin_unlock_bh(d->lock);
+ if (d->lock)
+ spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
@@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(qdisc_lock(qdisc));
}
/* Reclaim root sleeping lock before completing stats */
- spin_lock_bh(d->lock);
- if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
+ if (d->lock)
+ spin_lock_bh(d->lock);
+ if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
return -1;
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL,
&sch->qstats, sch->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index bcdd54bb101c..21e69d2e8347 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -356,7 +356,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fee1b15506b2..06eca7060683 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -319,7 +319,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8d2d8d953432..85d41979d825 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
goto destroy_class;
@@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL,
&cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)