From 6fe1c7a5556807e9d7154a2d2fb938d8a9e47e5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Jul 2008 23:21:31 -0700 Subject: net-sched: add dynamically sized qdisc class hash helpers Currently all qdiscs which allow to create classes uses a fixed sized hash table with size 16 to hash the classes. This causes a large bottleneck when using thousands of classes and unbound filters. Add helpers for dynamically sized class hashes to fix this. The following patches will convert the qdiscs to use them. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 10f01ad04380..e9ebc7af049e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -316,6 +316,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); +struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head), i; + struct hlist_head *h; + + if (size <= PAGE_SIZE) + h = kmalloc(size, GFP_KERNEL); + else + h = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(size)); + + if (h != NULL) { + for (i = 0; i < n; i++) + INIT_HLIST_HEAD(&h[i]); + } + return h; +} + +static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head); + + if (size <= PAGE_SIZE) + kfree(h); + else + free_pages((unsigned long)h, get_order(size)); +} + +void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) +{ + struct Qdisc_class_common *cl; + struct hlist_node *n, *next; + struct hlist_head *nhash, *ohash; + unsigned int nsize, nmask, osize; + unsigned int i, h; + + /* Rehash when load factor exceeds 0.75 */ + if (clhash->hashelems * 4 <= clhash->hashsize * 3) + return; + nsize = clhash->hashsize * 2; + nmask = nsize - 1; + nhash = qdisc_class_hash_alloc(nsize); + if (nhash == NULL) + return; + + ohash = clhash->hash; + osize = clhash->hashsize; + + sch_tree_lock(sch); + for (i = 0; i < osize; i++) { + hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + h = qdisc_class_hash(cl->classid, nmask); + hlist_add_head(&cl->hnode, &nhash[h]); + } + } + clhash->hash = nhash; + clhash->hashsize = nsize; + clhash->hashmask = nmask; + sch_tree_unlock(sch); + + qdisc_class_hash_free(ohash, osize); +} +EXPORT_SYMBOL(qdisc_class_hash_grow); + +int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) +{ + unsigned int size = 4; + + clhash->hash = qdisc_class_hash_alloc(size); + if (clhash->hash == NULL) + return -ENOMEM; + clhash->hashsize = size; + clhash->hashmask = size - 1; + clhash->hashelems = 0; + return 0; +} +EXPORT_SYMBOL(qdisc_class_hash_init); + +void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) +{ + qdisc_class_hash_free(clhash->hash, clhash->hashsize); +} +EXPORT_SYMBOL(qdisc_class_hash_destroy); + +void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + unsigned int h; + + INIT_HLIST_NODE(&cl->hnode); + h = qdisc_class_hash(cl->classid, clhash->hashmask); + hlist_add_head(&cl->hnode, &clhash->hash[h]); + clhash->hashelems++; +} +EXPORT_SYMBOL(qdisc_class_hash_insert); + +void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + hlist_del(&cl->hnode); + clhash->hashelems--; +} +EXPORT_SYMBOL(qdisc_class_hash_remove); + /* Allocate an unique handle from space managed by kernel */ static u32 qdisc_alloc_handle(struct net_device *dev) -- cgit v1.2.3 From e65d22e18038eed7307276e46810d884c402d57d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 16:46:01 -0700 Subject: pkt_sched: Remove comment reference to old style TX locking. We haven't had netdev->tbusy in many years :) Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9ebc7af049e..69e918bb4278 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -99,7 +99,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, ---requeue requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if dev->tbusy=0. + just buggy devices, which can defer output even if netif_queue_stopped()=0. ---reset -- cgit v1.2.3 From bb949fbd1878973c3539d9aecff52f284482a937 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 16:55:56 -0700 Subject: netdev: Create netdev_queue abstraction. A netdev_queue is an entity managed by a qdisc. Currently there is one RX and one TX queue, and a netdev_queue merely contains a backpointer to the net_device. The Qdisc struct is augmented with a netdev_queue pointer as well. Eventually the 'dev' Qdisc member will go away and we will have the resulting hierarchy: net_device --> netdev_queue --> Qdisc Also, qdisc_alloc() and qdisc_create_dflt() now take a netdev_queue pointer argument. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ include/net/sch_generic.h | 6 +++++- net/core/dev.c | 8 ++++++++ net/mac80211/wme.c | 6 ++++-- net/sched/sch_api.c | 12 +++++++----- net/sched/sch_atm.c | 6 ++++-- net/sched/sch_cbq.c | 9 ++++++--- net/sched/sch_dsmark.c | 6 ++++-- net/sched/sch_fifo.c | 3 ++- net/sched/sch_generic.c | 14 ++++++++++---- net/sched/sch_hfsc.c | 9 ++++++--- net/sched/sch_htb.c | 11 +++++++---- net/sched/sch_netem.c | 3 ++- net/sched/sch_prio.c | 3 ++- 14 files changed, 74 insertions(+), 29 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e009c6fbf5cd..515fd25bf0fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -448,6 +448,10 @@ static inline void napi_synchronize(const struct napi_struct *n) # define napi_synchronize(n) barrier() #endif +struct netdev_queue { + struct net_device *dev; +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -624,6 +628,9 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + struct netdev_queue rx_queue; + struct netdev_queue tx_queue; + /* ingress path synchronizer */ spinlock_t ingress_lock; struct Qdisc *qdisc_ingress; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 073f2580b83b..0ab53c575f87 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -37,6 +37,7 @@ struct Qdisc u32 parent; atomic_t refcnt; struct sk_buff_head q; + struct netdev_queue *dev_queue; struct net_device *dev; struct list_head list; @@ -216,8 +217,11 @@ extern void dev_deactivate(struct net_device *dev); extern void qdisc_reset(struct Qdisc *qdisc); extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); -extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); +extern struct Qdisc *qdisc_alloc(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); diff --git a/net/core/dev.c b/net/core/dev.c index 75933932463d..9b281c906eb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4072,6 +4072,12 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_queues(struct net_device *dev) +{ + dev->rx_queue.dev = dev; + dev->tx_queue.dev = dev; +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -4124,6 +4130,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 5c666f7eda8f..770f1c09b793 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -359,7 +359,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) /* create child queues */ for (i = 0; i < QD_NUM(hw); i++) { skb_queue_head_init(&q->requeued[i]); - q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops, + q->queues[i] = qdisc_create_dflt(qd->dev, qd->dev_queue, + &pfifo_qdisc_ops, qd->handle); if (!q->queues[i]) { q->queues[i] = &noop_qdisc; @@ -575,7 +576,8 @@ void ieee80211_install_qdisc(struct net_device *dev) { struct Qdisc *qdisc; - qdisc = qdisc_create_dflt(dev, &wme_qdisc_ops, TC_H_ROOT); + qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + &wme_qdisc_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_ERR "%s: qdisc installation failed\n", dev->name); return; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 69e918bb4278..b86c98bd06a3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -552,8 +552,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, + u32 parent, u32 handle, struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -593,7 +593,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev, dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -892,10 +892,12 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + q = qdisc_create(dev, &dev->rx_queue, + tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + q = qdisc_create(dev, &dev->tx_queue, + tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { if (err == -EAGAIN) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index db0e23ae85f8..3dddab531d5a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -555,7 +556,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 968b4c73c9c1..d360dcd0818b 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1401,7 +1401,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.sibling = &q->link; q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + if (!(q->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, sch->handle))) q->link.q = &noop_qdisc; @@ -1645,7 +1646,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); if (new == NULL) return -ENOBUFS; @@ -1877,7 +1879,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) + if (!(cl->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; cl->common.classid = classid; cl->tparent = parent; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index c4c1317cd47d..c955ba24e5cf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (new == NULL) new = &noop_qdisc; @@ -390,7 +391,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 82d7d7bbbb16..779eae85faf0 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -137,7 +137,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, struct Qdisc *q; int err = -ENOMEM; - q = qdisc_create_dflt(sch->dev, ops, TC_H_MAKE(sch->handle, 1)); + q = qdisc_create_dflt(sch->dev, sch->dev_queue, + ops, TC_H_MAKE(sch->handle, 1)); if (q) { err = fifo_set_limit(q, limit); if (err < 0) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 13afa7214392..d97086480893 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -440,7 +440,9 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc *qdisc_alloc(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -462,6 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; + sch->dev_queue = dev_queue; sch->dev = dev; dev_hold(dev); atomic_set(&sch->refcnt, 1); @@ -471,12 +474,14 @@ errout: return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, +struct Qdisc * qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev, dev_queue, ops); if (IS_ERR(sch)) goto errout; sch->stats_lock = &dev->queue_lock; @@ -545,7 +550,8 @@ void dev_activate(struct net_device *dev) if (dev->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + &pfifo_fast_ops, TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3a8267246a4f..5a22fec4eadd 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1083,7 +1083,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; INIT_LIST_HEAD(&cl->children); @@ -1201,7 +1202,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, cl->cl_common.classid); if (new == NULL) new = &noop_qdisc; @@ -1443,7 +1445,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + q->root.qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ee8b4ffe110c..956a67f66b9c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1129,7 +1129,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl && !cl->level) { if (new == NULL && - (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + (new = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid)) == NULL) return -ENOBUFS; @@ -1256,8 +1257,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->parent->common.classid); + new_q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, + cl->parent->common.classid); last_child = 1; } @@ -1376,7 +1378,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + new_q = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 24697667247c..aa7a04e32ae9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -536,7 +536,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + q->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 5532f1031ab5..ca58a039208e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -281,7 +281,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; ibands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + child = qdisc_create_dflt(sch->dev, sch->dev_queue, + &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); -- cgit v1.2.3 From 5ce2d488fe039ddd86a638496cf704df86c74eeb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:06:30 -0700 Subject: pkt_sched: Remove 'dev' member of struct Qdisc. It can be obtained via the netdev_queue. So create a helper routine, qdisc_dev(), to make the transformations nicer looking. Now, qdisc_alloc() now no longer needs a net_device pointer argument. Signed-off-by: David S. Miller --- include/net/sch_generic.h | 16 +++++++++------- net/mac80211/wme.c | 28 ++++++++++++++-------------- net/sched/cls_api.c | 2 +- net/sched/cls_route.c | 4 ++-- net/sched/sch_api.c | 10 +++++----- net/sched/sch_atm.c | 4 ++-- net/sched/sch_cbq.c | 22 +++++++++++----------- net/sched/sch_dsmark.c | 4 ++-- net/sched/sch_fifo.c | 6 +++--- net/sched/sch_generic.c | 12 +++++------- net/sched/sch_gred.c | 2 +- net/sched/sch_hfsc.c | 10 +++++----- net/sched/sch_htb.c | 24 ++++++++++++------------ net/sched/sch_netem.c | 10 +++++----- net/sched/sch_prio.c | 15 ++++++++------- net/sched/sch_sfq.c | 4 ++-- net/sched/sch_teql.c | 12 ++++++------ 17 files changed, 93 insertions(+), 92 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0ab53c575f87..66ec36d8ac97 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -38,7 +38,6 @@ struct Qdisc atomic_t refcnt; struct sk_buff_head q; struct netdev_queue *dev_queue; - struct net_device *dev; struct list_head list; struct gnet_stats_basic bstats; @@ -156,14 +155,18 @@ struct tcf_proto struct tcf_proto_ops *ops; }; +static inline struct net_device *qdisc_dev(struct Qdisc *qdisc) +{ + return qdisc->dev_queue->dev; +} extern void qdisc_lock_tree(struct net_device *dev); extern void qdisc_unlock_tree(struct net_device *dev); -#define sch_tree_lock(q) qdisc_lock_tree((q)->dev) -#define sch_tree_unlock(q) qdisc_unlock_tree((q)->dev) -#define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev) -#define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev) +#define sch_tree_lock(q) qdisc_lock_tree(qdisc_dev(q)) +#define sch_tree_unlock(q) qdisc_unlock_tree(qdisc_dev(q)) +#define tcf_tree_lock(tp) qdisc_lock_tree(qdisc_dev((tp)->q)) +#define tcf_tree_unlock(tp) qdisc_unlock_tree(qdisc_dev((tp)->q)) extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; @@ -217,8 +220,7 @@ extern void dev_deactivate(struct net_device *dev); extern void qdisc_reset(struct Qdisc *qdisc); extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); -extern struct Qdisc *qdisc_alloc(struct net_device *dev, - struct netdev_queue *dev_queue, +extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, struct netdev_queue *dev_queue, diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 770f1c09b793..2fbc171130bf 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -103,7 +103,7 @@ static inline int wme_downgrade_ac(struct sk_buff *skb) * negative return value indicates to drop the frame */ static int classify80211(struct sk_buff *skb, struct Qdisc *qd) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (!ieee80211_is_data(hdr->frame_control)) { @@ -140,7 +140,7 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd) static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; struct ieee80211_sched_data *q = qdisc_priv(qd); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -249,7 +249,7 @@ static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd) static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; + struct net_device *dev = qdisc_dev(qd); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; @@ -286,7 +286,7 @@ static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd) static void wme_qdiscop_reset(struct Qdisc* qd) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; int queue; @@ -303,7 +303,7 @@ static void wme_qdiscop_reset(struct Qdisc* qd) static void wme_qdiscop_destroy(struct Qdisc* qd) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; int queue; @@ -328,7 +328,7 @@ static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt) static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; + struct net_device *dev = qdisc_dev(qd); struct ieee80211_local *local; struct ieee80211_hw *hw; int err = 0, i; @@ -359,7 +359,7 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) /* create child queues */ for (i = 0; i < QD_NUM(hw); i++) { skb_queue_head_init(&q->requeued[i]); - q->queues[i] = qdisc_create_dflt(qd->dev, qd->dev_queue, + q->queues[i] = qdisc_create_dflt(qdisc_dev(qd), qd->dev_queue, &pfifo_qdisc_ops, qd->handle); if (!q->queues[i]) { @@ -386,7 +386,7 @@ static int wme_classop_graft(struct Qdisc *qd, unsigned long arg, struct Qdisc *new, struct Qdisc **old) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; unsigned long queue = arg - 1; @@ -410,7 +410,7 @@ static struct Qdisc * wme_classop_leaf(struct Qdisc *qd, unsigned long arg) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; unsigned long queue = arg - 1; @@ -423,7 +423,7 @@ wme_classop_leaf(struct Qdisc *qd, unsigned long arg) static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; unsigned long queue = TC_H_MIN(classid); @@ -450,7 +450,7 @@ static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg) { unsigned long cl = *arg; - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; if (cl - 1 > QD_NUM(hw)) @@ -467,7 +467,7 @@ static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, * when we add WMM-SA support - TSPECs may be deleted here */ static int wme_classop_delete(struct Qdisc *qd, unsigned long cl) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; if (cl - 1 > QD_NUM(hw)) @@ -480,7 +480,7 @@ static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; if (cl - 1 > QD_NUM(hw)) @@ -494,7 +494,7 @@ static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl, static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); + struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; int queue; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9360fc81e8c7..e2389f161e46 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -334,7 +334,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad1 = 0; - tcm->tcm_ifindex = tp->q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; tcm->tcm_parent = tp->classid; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 784dcb870b98..5a16ca28aa3d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -302,7 +302,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) *fp = f->next; tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(qdisc_dev(tp->q), head, f->id); route4_delete_filter(tp, f); /* Strip tree */ @@ -500,7 +500,7 @@ reinsert: } tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(qdisc_dev(tp->q), head, f->id); *arg = (unsigned long)f; return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b86c98bd06a3..1f893082a4f6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -281,7 +281,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = wd->qdisc->dev; + struct net_device *dev = qdisc_dev(wd->qdisc); wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); @@ -493,7 +493,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; - sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON(parentid != TC_H_ROOT); return; @@ -593,7 +593,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, dev_queue, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -940,7 +940,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = atomic_read(&q->refcnt); @@ -1186,7 +1186,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 3dddab531d5a..0de757e3be4a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -296,7 +296,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; @@ -556,7 +556,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - p->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue, + p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d360dcd0818b..9f2ace585fd6 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); + netif_schedule(qdisc_dev(sch)); return HRTIMER_NORESTART; } @@ -1077,9 +1077,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { + if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); - cl->quantum = cl->qdisc->dev->mtu/2 + 1; + cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } } @@ -1401,7 +1401,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.sibling = &q->link; q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue, + if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle))) q->link.q = &noop_qdisc; @@ -1411,7 +1411,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.cpriority = TC_CBQ_MAXPRIO-1; q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; q->link.overlimit = cbq_ovl_classic; - q->link.allot = psched_mtu(sch->dev); + q->link.allot = psched_mtu(qdisc_dev(sch)); q->link.quantum = q->link.allot; q->link.weight = q->link.R_tab->rate.rate; @@ -1646,7 +1646,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - new = qdisc_create_dflt(sch->dev, sch->dev_queue, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid); if (new == NULL) @@ -1746,10 +1746,10 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) #ifdef CONFIG_NET_CLS_ACT struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(&qdisc_dev(sch)->queue_lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); #endif cbq_destroy_class(sch, cl); @@ -1828,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); return 0; } @@ -1879,7 +1879,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; cl->common.classid = classid; @@ -1919,7 +1919,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index c955ba24e5cf..3aafbd17393a 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -60,7 +60,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, sch->dev_queue, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (new == NULL) @@ -391,7 +391,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev, sch->dev_queue, + p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 779eae85faf0..1d97fa42c902 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) struct fifo_sched_data *q = qdisc_priv(sch); if (opt == NULL) { - u32 limit = sch->dev->tx_queue_len ? : 1; + u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= sch->dev->mtu; + limit *= qdisc_dev(sch)->mtu; q->limit = limit; } else { @@ -137,7 +137,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, struct Qdisc *q; int err = -ENOMEM; - q = qdisc_create_dflt(sch->dev, sch->dev_queue, + q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); if (q) { err = fifo_set_limit(q, limit); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d97086480893..b626a4f32b6b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -364,7 +364,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { struct sk_buff_head *list = prio2list(skb, qdisc); - if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); } @@ -440,8 +440,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, - struct netdev_queue *dev_queue, +struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops) { void *p; @@ -465,8 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - sch->dev = dev; - dev_hold(dev); + dev_hold(qdisc_dev(sch)); atomic_set(&sch->refcnt, 1); return sch; @@ -481,7 +479,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, { struct Qdisc *sch; - sch = qdisc_alloc(dev, dev_queue, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; sch->stats_lock = &dev->queue_lock; @@ -534,7 +532,7 @@ void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put(qdisc->dev); + dev_put(qdisc_dev(qdisc)); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c89fba56db56..39fa28511f07 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) + if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) return qdisc_enqueue_tail(skb, sch); else goto drop; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a22fec4eadd..333525422f45 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); return 0; } @@ -1083,7 +1083,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1202,7 +1202,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, sch->dev_queue, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, cl->cl_common.classid); if (new == NULL) @@ -1445,7 +1445,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 956a67f66b9c..31f7d1536e6d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1026,7 +1026,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = sch->dev->tx_queue_len; + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; @@ -1043,7 +1043,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(&qdisc_dev(sch)->queue_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1057,11 +1057,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1073,7 +1073,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(&qdisc_dev(sch)->queue_lock); tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) @@ -1095,11 +1095,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1129,7 +1129,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl && !cl->level) { if (new == NULL && - (new = qdisc_create_dflt(sch->dev, sch->dev_queue, + (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid)) == NULL) @@ -1257,7 +1257,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev, sch->dev_queue, + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid); last_child = 1; @@ -1365,7 +1365,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1378,7 +1378,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, sch->dev_queue, + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { @@ -1420,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); sch_tree_lock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aa7a04e32ae9..790582960444 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = sch->dev->qdisc; + struct Qdisc *rootq = qdisc_dev(sch)->qdisc; u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; @@ -333,9 +333,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(&qdisc_dev(sch)->queue_lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(&qdisc_dev(sch)->queue_lock); kfree(d); return 0; @@ -495,7 +495,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } else - q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); q->oldest = PSCHED_PASTPERFECT; return 0; @@ -536,7 +536,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue, + q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index ca58a039208e..39157f7bc046 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -136,7 +136,8 @@ prio_dequeue(struct Qdisc* sch) * pulling an skb. This way we avoid excessive requeues * for slower queues. */ - if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + if (!__netif_subqueue_stopped(qdisc_dev(sch), + (q->mq ? prio : 0))) { qdisc = q->queues[prio]; skb = qdisc->dequeue(qdisc); if (skb) { @@ -165,8 +166,8 @@ static struct sk_buff *rr_dequeue(struct Qdisc* sch) * for slower queues. If the queue is stopped, try the * next queue. */ - if (!__netif_subqueue_stopped(sch->dev, - (q->mq ? q->curband : 0))) { + if (!__netif_subqueue_stopped(qdisc_dev(sch), + (q->mq ? q->curband : 0))) { qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); if (skb) { @@ -249,10 +250,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) if (q->mq) { if (sch->parent != TC_H_ROOT) return -EINVAL; - if (netif_is_multiqueue(sch->dev)) { + if (netif_is_multiqueue(qdisc_dev(sch))) { if (q->bands == 0) - q->bands = sch->dev->egress_subqueue_count; - else if (q->bands != sch->dev->egress_subqueue_count) + q->bands = qdisc_dev(sch)->egress_subqueue_count; + else if (q->bands != qdisc_dev(sch)->egress_subqueue_count) return -EINVAL; } else return -EOPNOTSUPP; @@ -281,7 +282,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; ibands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, sch->dev_queue, + child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6a97afbfb952..8458f630fac4 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = psched_mtu(sch->dev); + q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; q->perturbation = net_random(); } else { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0444fd0f0d22..b3fc82623fc6 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,7 +78,7 @@ struct teql_sched_data static int teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < dev->tx_queue_len) { @@ -111,7 +111,7 @@ teql_dequeue(struct Qdisc* sch) skb = __skb_dequeue(&dat->q); if (skb == NULL) { - struct net_device *m = dat->m->dev->qdisc->dev; + struct net_device *m = qdisc_dev(dat->m->dev->qdisc); if (m) { dat->m->slaves = sch; netif_wake_queue(m); @@ -170,7 +170,7 @@ teql_destroy(struct Qdisc* sch) static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master*)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); @@ -282,7 +282,7 @@ restart: goto drop; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); if (slave->qdisc_sleeping != q) continue; @@ -352,7 +352,7 @@ static int teql_master_open(struct net_device *dev) q = m->slaves; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; @@ -403,7 +403,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) q = m->slaves; if (q) { do { - if (new_mtu > q->dev->mtu) + if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q=NEXT_SLAVE(q)) != m->slaves); } -- cgit v1.2.3 From dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:18:23 -0700 Subject: netdev: Move queue_lock into struct netdev_queue. The lock is now an attribute of the device queue. One thing to notice is that "suspicious" places emerge which will need specific training about multiple queue handling. They are so marked with explicit "netdev->rx_queue" and "netdev->tx_queue" references. Signed-off-by: David S. Miller --- drivers/net/ifb.c | 8 ++++---- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 33 ++++++++++++++++++++++----------- net/mac80211/main.c | 10 +++++----- net/mac80211/wme.c | 2 +- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 8 ++++---- net/sched/sch_generic.c | 40 ++++++++++++++++++++-------------------- net/sched/sch_hfsc.c | 4 ++-- net/sched/sch_htb.c | 16 ++++++++-------- net/sched/sch_netem.c | 4 ++-- net/sched/sch_teql.c | 4 ++-- 12 files changed, 73 insertions(+), 62 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index af233b591534..bc3de272a829 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -229,12 +229,12 @@ module_param(numifbs, int, 0); MODULE_PARM_DESC(numifbs, "Number of ifb devices"); /* - * dev_ifb->queue_lock is usually taken after dev->ingress_lock, + * dev_ifb->tx_queue.lock is usually taken after dev->ingress_lock, * reversely to e.g. qdisc_lock_tree(). It should be safe until - * ifb doesn't take dev->queue_lock with dev_ifb->ingress_lock. + * ifb doesn't take dev->tx_queue.lock with dev_ifb->ingress_lock. * But lockdep should know that ifb has different locks from dev. */ -static struct lock_class_key ifb_queue_lock_key; +static struct lock_class_key ifb_tx_queue_lock_key; static struct lock_class_key ifb_ingress_lock_key; @@ -258,7 +258,7 @@ static int __init ifb_init_one(int index) if (err < 0) goto err; - lockdep_set_class(&dev_ifb->queue_lock, &ifb_queue_lock_key); + lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key); lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key); return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 515fd25bf0fc..e835acacb479 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -449,6 +449,7 @@ static inline void napi_synchronize(const struct napi_struct *n) #endif struct netdev_queue { + spinlock_t lock; struct net_device *dev; }; @@ -629,7 +630,7 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ struct netdev_queue rx_queue; - struct netdev_queue tx_queue; + struct netdev_queue tx_queue ____cacheline_aligned_in_smp; /* ingress path synchronizer */ spinlock_t ingress_lock; @@ -639,7 +640,6 @@ struct net_device * Cache line mostly used on queue transmit path (qdisc) */ /* device queue lock */ - spinlock_t queue_lock ____cacheline_aligned_in_smp; struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; diff --git a/net/core/dev.c b/net/core/dev.c index 9b281c906eb0..05011048b86c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1667,6 +1667,7 @@ out_kfree_skb: int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1699,14 +1700,15 @@ int dev_queue_xmit(struct sk_buff *skb) } gso: - spin_lock_prefetch(&dev->queue_lock); + txq = &dev->tx_queue; + spin_lock_prefetch(&txq->lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. + /* Updates of qdisc are serialized by queue->lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure - it may be accessed without acquiring * a lock (but the structure may be stale.) The freeing of the @@ -1714,7 +1716,7 @@ gso: * more references to it. * * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock + * hold the queue->lock before calling it, since queue->lock * also serializes access to the device queue. */ @@ -1724,19 +1726,19 @@ gso: #endif if (q->enqueue) { /* Grab device queue */ - spin_lock(&dev->queue_lock); + spin_lock(&txq->lock); q = dev->qdisc; if (q->enqueue) { /* reset queue_mapping to zero */ skb_set_queue_mapping(skb, 0); rc = q->enqueue(skb, q); qdisc_run(dev); - spin_unlock(&dev->queue_lock); + spin_unlock(&txq->lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } - spin_unlock(&dev->queue_lock); + spin_unlock(&txq->lock); } /* The device has no queue. Common case for software devices: @@ -1919,14 +1921,17 @@ static void net_tx_action(struct softirq_action *h) while (head) { struct net_device *dev = head; + struct netdev_queue *txq; head = head->next_sched; + txq = &dev->tx_queue; + smp_mb__before_clear_bit(); clear_bit(__LINK_STATE_SCHED, &dev->state); - if (spin_trylock(&dev->queue_lock)) { + if (spin_trylock(&txq->lock)) { qdisc_run(dev); - spin_unlock(&dev->queue_lock); + spin_unlock(&txq->lock); } else { netif_schedule(dev); } @@ -3787,7 +3792,6 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); dev->xmit_lock_owner = -1; @@ -4072,10 +4076,17 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue) +{ + spin_lock_init(&queue->lock); + queue->dev = dev; +} + static void netdev_init_queues(struct net_device *dev) { - dev->rx_queue.dev = dev; - dev->tx_queue.dev = dev; + netdev_init_one_queue(dev, &dev->rx_queue); + netdev_init_one_queue(dev, &dev->tx_queue); } /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index cf477ad39dac..12aeaf78ae75 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -636,7 +636,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* ensure that TX flow won't interrupt us * until the end of the call to requeue function */ - spin_lock_bh(&local->mdev->queue_lock); + spin_lock_bh(&local->mdev->tx_queue.lock); /* create a new queue for this aggregation */ ret = ieee80211_ht_agg_queue_add(local, sta, tid); @@ -675,7 +675,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* Will put all the packets in the new SW queue */ ieee80211_requeue(local, ieee802_1d_to_ac[tid]); - spin_unlock_bh(&local->mdev->queue_lock); + spin_unlock_bh(&local->mdev->tx_queue.lock); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -701,7 +701,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) err_unlock_queue: kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&local->mdev->queue_lock); + spin_unlock_bh(&local->mdev->tx_queue.lock); ret = -EBUSY; err_unlock_sta: spin_unlock_bh(&sta->lock); @@ -875,10 +875,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) /* avoid ordering issues: we are the only one that can modify * the content of the qdiscs */ - spin_lock_bh(&local->mdev->queue_lock); + spin_lock_bh(&local->mdev->tx_queue.lock); /* remove the queue for this aggregation */ ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - spin_unlock_bh(&local->mdev->queue_lock); + spin_unlock_bh(&local->mdev->tx_queue.lock); /* we just requeued the all the frames that were in the removed * queue, and since we might miss a softirq we do netif_schedule. diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 2fbc171130bf..59ed9cae66b9 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -648,7 +648,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, } /** - * the caller needs to hold local->mdev->queue_lock + * the caller needs to hold local->mdev->tx_queue.lock */ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1f893082a4f6..2a1834f8c7d8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -606,7 +606,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { - sch->stats_lock = &dev->queue_lock; + sch->stats_lock = &dev_queue->lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9f2ace585fd6..99ce3da2b0a4 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1746,10 +1746,10 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) #ifdef CONFIG_NET_CLS_ACT struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&qdisc_dev(sch)->queue_lock); + spin_lock_bh(&sch->dev_queue->lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); #endif cbq_destroy_class(sch, cl); @@ -1828,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, + &sch->dev_queue->lock, tca[TCA_RATE]); return 0; } @@ -1919,7 +1919,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); + &sch->dev_queue->lock, tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b626a4f32b6b..ee8f9f78a095 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,31 +29,31 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * dev->queue_lock spinlock. + * queue->lock spinlock. * * The idea is the following: * - enqueue, dequeue are serialized via top level device - * spinlock dev->queue_lock. + * spinlock queue->lock. * - ingress filtering is serialized via top level device * spinlock dev->ingress_lock. * - updates to tree and tree walking are only done under the rtnl mutex. */ void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->queue_lock) + __acquires(dev->tx_queue.lock) __acquires(dev->ingress_lock) { - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); spin_lock(&dev->ingress_lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) __releases(dev->ingress_lock) - __releases(dev->queue_lock) + __releases(dev->tx_queue.lock) { spin_unlock(&dev->ingress_lock); - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); } EXPORT_SYMBOL(qdisc_unlock_tree); @@ -118,15 +118,15 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * NOTE: Called under dev->queue_lock with locally disabled BH. + * NOTE: Called under queue->lock with locally disabled BH. * * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. dev->queue_lock serializes queue accesses for + * device at a time. queue->lock serializes queue accesses for * this device AND dev->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. * - * dev->queue_lock and netif_tx_lock are mutually exclusive, + * queue->lock and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -148,14 +148,14 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ - spin_unlock(&dev->queue_lock); + spin_unlock(&q->dev_queue->lock); HARD_TX_LOCK(dev, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); HARD_TX_UNLOCK(dev); - spin_lock(&dev->queue_lock); + spin_lock(&q->dev_queue->lock); q = dev->qdisc; switch (ret) { @@ -482,7 +482,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev->queue_lock; + sch->stats_lock = &dev_queue->lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -494,7 +494,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under dev->queue_lock and BH! */ +/* Under queue->lock and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -514,7 +514,7 @@ static void __qdisc_destroy(struct rcu_head *head) kfree((char *) qdisc - qdisc->padded); } -/* Under dev->queue_lock and BH! */ +/* Under queue->lock and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { @@ -566,13 +566,13 @@ void dev_activate(struct net_device *dev) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); if (dev->qdisc != &noqueue_qdisc) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); } void dev_deactivate(struct net_device *dev) @@ -581,7 +581,7 @@ void dev_deactivate(struct net_device *dev) struct sk_buff *skb; int running; - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); qdisc = dev->qdisc; dev->qdisc = &noop_qdisc; @@ -589,7 +589,7 @@ void dev_deactivate(struct net_device *dev) skb = dev->gso_skb; dev->gso_skb = NULL; - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); kfree_skb(skb); @@ -607,9 +607,9 @@ void dev_deactivate(struct net_device *dev) * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); /* * The running flag should never be set at this point because diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 333525422f45..997d520ca580 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, + &sch->dev_queue->lock, tca[TCA_RATE]); return 0; } @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]); + &sch->dev_queue->lock, tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 31f7d1536e6d..c8ca54cc26b0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1043,7 +1043,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&qdisc_dev(sch)->queue_lock); + spin_lock_bh(&sch->dev_queue->lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1057,11 +1057,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); return skb->len; nla_put_failure: - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); nla_nest_cancel(skb, nest); return -1; } @@ -1073,7 +1073,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&qdisc_dev(sch)->queue_lock); + spin_lock_bh(&sch->dev_queue->lock); tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) @@ -1095,11 +1095,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); return skb->len; nla_put_failure: - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); nla_nest_cancel(skb, nest); return -1; } @@ -1365,7 +1365,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, + &sch->dev_queue->lock, tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1420,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &qdisc_dev(sch)->queue_lock, + &sch->dev_queue->lock, tca[TCA_RATE]); sch_tree_lock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 790582960444..71b73c528f9b 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -333,9 +333,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&qdisc_dev(sch)->queue_lock); + spin_lock_bh(&sch->dev_queue->lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&qdisc_dev(sch)->queue_lock); + spin_unlock_bh(&sch->dev_queue->lock); kfree(d); return 0; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index b3fc82623fc6..4f3054e8e1ab 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -154,9 +154,9 @@ teql_destroy(struct Qdisc* sch) master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { master->slaves = NULL; - spin_lock_bh(&master->dev->queue_lock); + spin_lock_bh(&master->dev->tx_queue.lock); qdisc_reset(master->dev->qdisc); - spin_unlock_bh(&master->dev->queue_lock); + spin_unlock_bh(&master->dev->tx_queue.lock); } } skb_queue_purge(&dat->q); -- cgit v1.2.3 From 555353cfa1aee293de445bfa6de43276138ddd82 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:33:13 -0700 Subject: netdev: The ingress_lock member is no longer needed. Every qdisc is assosciated with a queue, and in the case of ingress qdiscs that will now be netdev->rx_queue so using that queue's lock is the thing to do. Signed-off-by: David S. Miller --- drivers/net/ifb.c | 8 ++++---- include/linux/netdevice.h | 2 -- net/core/dev.c | 12 +++++++----- net/sched/sch_api.c | 3 +-- net/sched/sch_generic.c | 10 +++++----- 5 files changed, 17 insertions(+), 18 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index bc3de272a829..ccbd6554f6eb 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -229,13 +229,13 @@ module_param(numifbs, int, 0); MODULE_PARM_DESC(numifbs, "Number of ifb devices"); /* - * dev_ifb->tx_queue.lock is usually taken after dev->ingress_lock, + * dev_ifb->tx_queue.lock is usually taken after dev->rx_queue.lock, * reversely to e.g. qdisc_lock_tree(). It should be safe until - * ifb doesn't take dev->tx_queue.lock with dev_ifb->ingress_lock. + * ifb doesn't take dev->tx_queue.lock with dev_ifb->rx_queue.lock. * But lockdep should know that ifb has different locks from dev. */ static struct lock_class_key ifb_tx_queue_lock_key; -static struct lock_class_key ifb_ingress_lock_key; +static struct lock_class_key ifb_rx_queue_lock_key; static int __init ifb_init_one(int index) @@ -259,7 +259,7 @@ static int __init ifb_init_one(int index) goto err; lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key); - lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key); + lockdep_set_class(&dev_ifb->rx_queue.lock, &ifb_rx_queue_lock_key); return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e835acacb479..633a44c6fa5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -632,8 +632,6 @@ struct net_device struct netdev_queue rx_queue; struct netdev_queue tx_queue ____cacheline_aligned_in_smp; - /* ingress path synchronizer */ - spinlock_t ingress_lock; struct Qdisc *qdisc_ingress; /* diff --git a/net/core/dev.c b/net/core/dev.c index 05011048b86c..2322fb69fd53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2014,10 +2014,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -2029,10 +2030,12 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); + rxq = &dev->rx_queue; + + spin_lock(&rxq->lock); if ((q = dev->qdisc_ingress) != NULL) result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + spin_unlock(&rxq->lock); return result; } @@ -3795,7 +3798,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->_xmit_lock); netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); dev->iflink = -1; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a1834f8c7d8..570cef2a9c5f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -601,12 +601,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->parent = parent; + sch->stats_lock = &dev_queue->lock; if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; - sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { - sch->stats_lock = &dev_queue->lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ee8f9f78a095..804d44b00348 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -35,24 +35,24 @@ * - enqueue, dequeue are serialized via top level device * spinlock queue->lock. * - ingress filtering is serialized via top level device - * spinlock dev->ingress_lock. + * spinlock dev->rx_queue.lock. * - updates to tree and tree walking are only done under the rtnl mutex. */ void qdisc_lock_tree(struct net_device *dev) __acquires(dev->tx_queue.lock) - __acquires(dev->ingress_lock) + __acquires(dev->rx_queue.lock) { spin_lock_bh(&dev->tx_queue.lock); - spin_lock(&dev->ingress_lock); + spin_lock(&dev->rx_queue.lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->ingress_lock) + __releases(dev->rx_queue.lock) __releases(dev->tx_queue.lock) { - spin_unlock(&dev->ingress_lock); + spin_unlock(&dev->rx_queue.lock); spin_unlock_bh(&dev->tx_queue.lock); } EXPORT_SYMBOL(qdisc_unlock_tree); -- cgit v1.2.3 From b0e1e6462df3c5944010b3328a546d8fe5d932cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:42:10 -0700 Subject: netdev: Move rest of qdisc state into struct netdev_queue Now qdisc, qdisc_sleeping, and qdisc_list also live there. Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_net.c | 2 +- include/linux/netdevice.h | 10 ++--- include/net/irda/irda_device.h | 2 +- net/core/dev.c | 4 +- net/core/link_watch.c | 8 +++- net/core/rtnetlink.c | 6 ++- net/ipv6/addrconf.c | 3 +- net/mac80211/wme.c | 20 ++++++---- net/sched/cls_api.c | 7 +++- net/sched/sch_api.c | 34 ++++++++++------ net/sched/sch_generic.c | 90 ++++++++++++++++++++++++++---------------- net/sched/sch_netem.c | 2 +- net/sched/sch_teql.c | 14 ++++--- 13 files changed, 125 insertions(+), 77 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index ef1a300068dc..457bbd119f9b 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -287,7 +287,7 @@ isdn_net_unbind_channel(isdn_net_local * lp) BEWARE! This chunk of code cannot be called from hardware interrupt handler. I hope it is true. --ANK */ - qdisc_reset(lp->netdev->dev->qdisc); + qdisc_reset(lp->netdev->dev->tx_queue.qdisc); } lp->dialstate = 0; dev->rx_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 633a44c6fa5e..df702a7b3db5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,9 @@ static inline void napi_synchronize(const struct napi_struct *n) struct netdev_queue { spinlock_t lock; struct net_device *dev; + struct Qdisc *qdisc; + struct Qdisc *qdisc_sleeping; + struct list_head qdisc_list; }; /* @@ -634,13 +637,6 @@ struct net_device struct Qdisc *qdisc_ingress; -/* - * Cache line mostly used on queue transmit path (qdisc) - */ - /* device queue lock */ - struct Qdisc *qdisc; - struct Qdisc *qdisc_sleeping; - struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Partially transmitted GSO packet. */ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index f70e9b39ebaf..16fbf672e0b2 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -223,7 +223,7 @@ int irda_device_is_receiving(struct net_device *dev); /* Interface for internal use */ static inline int irda_device_txqueue_empty(const struct net_device *dev) { - return skb_queue_empty(&dev->qdisc->q); + return skb_queue_empty(&dev->tx_queue.qdisc->q); } int irda_device_set_raw_mode(struct net_device* self, int status); struct net_device *alloc_irdadev(int sizeof_priv); diff --git a/net/core/dev.c b/net/core/dev.c index 2322fb69fd53..ce79c28d739d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1720,14 +1720,14 @@ gso: * also serializes access to the device queue. */ - q = rcu_dereference(dev->qdisc); + q = rcu_dereference(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { /* Grab device queue */ spin_lock(&txq->lock); - q = dev->qdisc; + q = txq->qdisc; if (q->enqueue) { /* reset queue_mapping to zero */ skb_set_queue_mapping(skb, 0); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a5e372b9ec4d..50218218445b 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -79,8 +79,10 @@ static void rfc2863_policy(struct net_device *dev) static int linkwatch_urgent_event(struct net_device *dev) { + struct netdev_queue *txq = &dev->tx_queue; + return netif_running(dev) && netif_carrier_ok(dev) && - dev->qdisc != dev->qdisc_sleeping; + txq->qdisc != txq->qdisc_sleeping; } @@ -181,7 +183,9 @@ static void __linkwatch_run_queue(int urgent_only) rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) { - WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + struct netdev_queue *txq = &dev->tx_queue; + + WARN_ON(txq->qdisc_sleeping == &noop_qdisc); dev_activate(dev); } else dev_deactivate(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6c8d7f0ea01a..8ef9f1db610e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -605,6 +605,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { + struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; struct net_device_stats *stats; @@ -635,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + txq = &dev->tx_queue; + if (txq->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8572cb05fc21..5c84c798331d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -231,7 +231,8 @@ const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTER /* Check if a valid qdisc is available */ static inline int addrconf_qdisc_ok(struct net_device *dev) { - return (dev->qdisc != &noop_qdisc); + struct netdev_queue *txq = &dev->tx_queue; + return (txq->qdisc != &noop_qdisc); } /* Check if a route is valid prefix route */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 59ed9cae66b9..6ae43a3c7726 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -574,9 +574,10 @@ static struct Qdisc_ops wme_qdisc_ops __read_mostly = void ieee80211_install_qdisc(struct net_device *dev) { + struct netdev_queue *txq = &dev->tx_queue; struct Qdisc *qdisc; - qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + qdisc = qdisc_create_dflt(dev, txq, &wme_qdisc_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_ERR "%s: qdisc installation failed\n", dev->name); @@ -587,15 +588,17 @@ void ieee80211_install_qdisc(struct net_device *dev) qdisc->handle = 0x80010000; qdisc_lock_tree(dev); - list_add_tail(&qdisc->list, &dev->qdisc_list); - dev->qdisc_sleeping = qdisc; + list_add_tail(&qdisc->list, &txq->qdisc_list); + txq->qdisc_sleeping = qdisc; qdisc_unlock_tree(dev); } int ieee80211_qdisc_installed(struct net_device *dev) { - return dev->qdisc_sleeping->ops == &wme_qdisc_ops; + struct netdev_queue *txq = &dev->tx_queue; + + return txq->qdisc_sleeping->ops == &wme_qdisc_ops; } @@ -614,8 +617,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { int i; + struct netdev_queue *txq = &local->mdev->tx_queue; struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); + qdisc_priv(txq->qdisc_sleeping); DECLARE_MAC_BUF(mac); /* prepare the filter and save it for the SW queue @@ -655,8 +659,9 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, u8 requeue) { struct ieee80211_hw *hw = &local->hw; + struct netdev_queue *txq = &local->mdev->tx_queue; struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); + qdisc_priv(txq->qdisc_sleeping); int agg_queue = sta->tid_to_tx_q[tid]; /* return the qdisc to the pool */ @@ -671,7 +676,8 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, void ieee80211_requeue(struct ieee80211_local *local, int queue) { - struct Qdisc *root_qd = local->mdev->qdisc_sleeping; + struct netdev_queue *txq = &local->mdev->tx_queue; + struct Qdisc *root_qd = txq->qdisc_sleeping; struct ieee80211_sched_data *q = qdisc_priv(root_qd); struct Qdisc *qdisc = q->queues[queue]; struct sk_buff *skb = NULL; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e2389f161e46..b483bbea6118 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -166,7 +166,8 @@ replay: /* Find qdisc */ if (!parent) { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = &dev->tx_queue; + q = dev_queue->qdisc_sleeping; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -390,6 +391,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -408,8 +410,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; + dev_queue = &dev->tx_queue; if (!tcm->tcm_parent) - q = dev->qdisc_sleeping; + q = dev_queue->qdisc_sleeping; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 570cef2a9c5f..2313fa7c97be 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -185,9 +185,10 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { + struct netdev_queue *dev_queue = &dev->tx_queue; struct Qdisc *q; - list_for_each_entry(q, &dev->qdisc_list, list) { + list_for_each_entry(q, &dev_queue->qdisc_list, list) { if (q->handle == handle) return q; } @@ -441,6 +442,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) static struct Qdisc * dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) { + struct netdev_queue *dev_queue; struct Qdisc *oqdisc; if (dev->flags & IFF_UP) @@ -459,8 +461,8 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) } } else { - - oqdisc = dev->qdisc_sleeping; + dev_queue = &dev->tx_queue; + oqdisc = dev_queue->qdisc_sleeping; /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) @@ -469,8 +471,8 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; } qdisc_unlock_tree(dev); @@ -633,7 +635,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } } qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev->qdisc_list); + list_add_tail(&sch->list, &dev_queue->qdisc_list); qdisc_unlock_tree(dev); return sch; @@ -740,7 +742,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->qdisc_ingress; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = &dev->tx_queue; + q = dev_queue->qdisc_sleeping; } if (!q) return -ENOENT; @@ -814,7 +817,8 @@ replay: q = dev->qdisc_ingress; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = &dev->tx_queue; + q = dev_queue->qdisc_sleeping; } /* It may be default qdisc, ignore it */ @@ -1015,12 +1019,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { + dev_queue = &dev->tx_queue; + list_for_each_entry(q, &dev_queue->qdisc_list, list) { if (q_idx < s_q_idx) { q_idx++; continue; @@ -1054,6 +1060,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -1091,6 +1098,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + dev_queue = &dev->tx_queue; if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1101,7 +1109,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1112,7 +1120,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; } /* OK. Locate qdisc */ @@ -1248,6 +1256,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -1266,7 +1275,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { + dev_queue = &dev->tx_queue; + list_for_each_entry(q, &dev_queue->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 804d44b00348..3223e5ba76aa 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -122,7 +122,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this * device at a time. queue->lock serializes queue accesses for - * this device AND dev->qdisc pointer itself. + * this device AND txq->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. * @@ -138,7 +138,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, */ static inline int qdisc_restart(struct net_device *dev) { - struct Qdisc *q = dev->qdisc; + struct netdev_queue *txq = &dev->tx_queue; + struct Qdisc *q = txq->qdisc; struct sk_buff *skb; int ret = NETDEV_TX_BUSY; @@ -148,15 +149,15 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ - spin_unlock(&q->dev_queue->lock); + spin_unlock(&txq->lock); HARD_TX_LOCK(dev, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); HARD_TX_UNLOCK(dev); - spin_lock(&q->dev_queue->lock); - q = dev->qdisc; + spin_lock(&txq->lock); + q = txq->qdisc; switch (ret) { case NETDEV_TX_OK: @@ -207,9 +208,10 @@ void __qdisc_run(struct net_device *dev) static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; + struct netdev_queue *txq = &dev->tx_queue; netif_tx_lock(dev); - if (dev->qdisc != &noop_qdisc) { + if (txq->qdisc != &noop_qdisc) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { @@ -539,53 +541,63 @@ EXPORT_SYMBOL(qdisc_destroy); void dev_activate(struct net_device *dev) { + struct netdev_queue *txq = &dev->tx_queue; + /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, which need queueing and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc_sleeping == &noop_qdisc) { + if (txq->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + qdisc = qdisc_create_dflt(dev, txq, &pfifo_fast_ops, TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - list_add_tail(&qdisc->list, &dev->qdisc_list); + list_add_tail(&qdisc->list, &txq->qdisc_list); } else { qdisc = &noqueue_qdisc; } - dev->qdisc_sleeping = qdisc; + txq->qdisc_sleeping = qdisc; } if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->tx_queue.lock); - rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); - if (dev->qdisc != &noqueue_qdisc) { + spin_lock_bh(&txq->lock); + rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping); + if (txq->qdisc != &noqueue_qdisc) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->tx_queue.lock); + spin_unlock_bh(&txq->lock); +} + +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc_default) +{ + struct Qdisc *qdisc = dev_queue->qdisc; + + if (qdisc) { + dev_queue->qdisc = qdisc_default; + qdisc_reset(qdisc); + } } void dev_deactivate(struct net_device *dev) { - struct Qdisc *qdisc; struct sk_buff *skb; int running; spin_lock_bh(&dev->tx_queue.lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; - - qdisc_reset(qdisc); + dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc); skb = dev->gso_skb; dev->gso_skb = NULL; @@ -622,32 +634,44 @@ void dev_deactivate(struct net_device *dev) } while (WARN_ON_ONCE(running)); } +static void dev_init_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc) +{ + dev_queue->qdisc = qdisc; + dev_queue->qdisc_sleeping = qdisc; + INIT_LIST_HEAD(&dev_queue->qdisc_list); +} + void dev_init_scheduler(struct net_device *dev) { qdisc_lock_tree(dev); - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - INIT_LIST_HEAD(&dev->qdisc_list); + dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); qdisc_unlock_tree(dev); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -void dev_shutdown(struct net_device *dev) +static void dev_shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc_default) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (qdisc) { + dev_queue->qdisc = qdisc_default; + dev_queue->qdisc_sleeping = qdisc_default; - qdisc_lock_tree(dev); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; qdisc_destroy(qdisc); } -#endif +} + +void dev_shutdown(struct net_device *dev) +{ + qdisc_lock_tree(dev); + dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); qdisc_unlock_tree(dev); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71b73c528f9b..4093f1eaaf60 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_dev(sch)->qdisc; + struct Qdisc *rootq = qdisc_dev(sch)->tx_queue.qdisc; u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 4f3054e8e1ab..8ac05981be20 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -107,17 +107,19 @@ static struct sk_buff * teql_dequeue(struct Qdisc* sch) { struct teql_sched_data *dat = qdisc_priv(sch); + struct netdev_queue *dat_queue; struct sk_buff *skb; skb = __skb_dequeue(&dat->q); + dat_queue = &dat->m->dev->tx_queue; if (skb == NULL) { - struct net_device *m = qdisc_dev(dat->m->dev->qdisc); + struct net_device *m = qdisc_dev(dat_queue->qdisc); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } - sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; } @@ -155,7 +157,7 @@ teql_destroy(struct Qdisc* sch) if (q == master->slaves) { master->slaves = NULL; spin_lock_bh(&master->dev->tx_queue.lock); - qdisc_reset(master->dev->qdisc); + qdisc_reset(master->dev->tx_queue.qdisc); spin_unlock_bh(&master->dev->tx_queue.lock); } } @@ -216,7 +218,7 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - struct teql_sched_data *q = qdisc_priv(dev->qdisc); + struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc); struct neighbour *mn = skb->dst->neighbour; struct neighbour *n = q->ncache; @@ -252,7 +254,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - if (dev->qdisc == &noop_qdisc) + if (dev->tx_queue.qdisc == &noop_qdisc) return -ENODEV; if (dev->header_ops == NULL || @@ -284,7 +286,7 @@ restart: do { struct net_device *slave = qdisc_dev(q); - if (slave->qdisc_sleeping != q) + if (slave->tx_queue.qdisc_sleeping != q) continue; if (netif_queue_stopped(slave) || __netif_subqueue_stopped(slave, subq) || -- cgit v1.2.3 From 816f3258e70db38d6d92c8d871377179fd69160f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 22:49:00 -0700 Subject: netdev: Kill qdisc_ingress, use netdev->rx_queue.qdisc instead. Now that our qdisc management is bi-directional, per-queue, and fully orthogonal, there is no reason to have a special ingress qdisc pointer in struct net_device. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- net/core/dev.c | 4 ++-- net/sched/sch_api.c | 11 ++++++----- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df702a7b3db5..e7c49246fd88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -634,9 +634,6 @@ struct net_device struct netdev_queue rx_queue; struct netdev_queue tx_queue ____cacheline_aligned_in_smp; - - struct Qdisc *qdisc_ingress; - unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Partially transmitted GSO packet. */ diff --git a/net/core/dev.c b/net/core/dev.c index ce79c28d739d..ab760a954d99 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2033,7 +2033,7 @@ static int ing_filter(struct sk_buff *skb) rxq = &dev->rx_queue; spin_lock(&rxq->lock); - if ((q = dev->qdisc_ingress) != NULL) + if ((q = rxq->qdisc) != NULL) result = q->enqueue(skb, q); spin_unlock(&rxq->lock); @@ -2044,7 +2044,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (!skb->dev->rx_queue.qdisc) goto out; if (*pt_prev) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2313fa7c97be..4003c280b69f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -450,14 +450,15 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) qdisc_lock_tree(dev); if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - oqdisc = dev->qdisc_ingress; + dev_queue = &dev->rx_queue; + oqdisc = dev_queue->qdisc; /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { /* delete */ qdisc_reset(oqdisc); - dev->qdisc_ingress = NULL; + dev_queue->qdisc = NULL; } else { /* new */ - dev->qdisc_ingress = qdisc; + dev_queue->qdisc = qdisc; } } else { @@ -739,7 +740,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { struct netdev_queue *dev_queue = &dev->tx_queue; @@ -814,7 +815,7 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { struct netdev_queue *dev_queue = &dev->tx_queue; -- cgit v1.2.3 From 68dfb42798e1eb2d42acbf872925cc75f1487d9b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 22:57:31 -0700 Subject: pkt_sched: Kill stats_lock member of struct Qdisc. It is always equal to qdisc->dev_queue->lock Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - net/sched/sch_api.c | 9 ++++----- net/sched/sch_generic.c | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 66ec36d8ac97..ea71705e9c77 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -43,7 +43,6 @@ struct Qdisc struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; struct rcu_head q_rcu; int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4003c280b69f..e73bd68aa7ae 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -604,7 +604,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->parent = parent; - sch->stats_lock = &dev_queue->lock; if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); @@ -622,7 +621,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, + &sch->dev_queue->lock, tca[TCA_RATE]); if (err) { /* @@ -664,7 +663,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) } if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, tca[TCA_RATE]); + &sch->dev_queue->lock, tca[TCA_RATE]); return 0; } @@ -954,7 +953,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, q->qstats.qlen = q->q.qlen; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, &q->dev_queue->lock, &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1203,7 +1202,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, &q->dev_queue->lock, &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3223e5ba76aa..dda78ee314ec 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -484,7 +484,6 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev_queue->lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) -- cgit v1.2.3 From 86d804e10a37cd86f16bf72386c37e843a98a74b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:11:25 -0700 Subject: netdev: Make netif_schedule() routines work with netdev_queue objects. Only plain netif_schedule() remains taking a net_device, mostly as a compatability item while we transition the rest of these interfaces. Everything else calls netif_schedule_queue() or __netif_schedule(), both of which take a netdev_queue pointer. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ++++++++++++----- net/core/dev.c | 9 +++++---- net/mac80211/main.c | 4 ++-- net/sched/sch_api.c | 4 ++-- net/sched/sch_cbq.c | 2 +- net/sched/sch_generic.c | 10 +++++----- 6 files changed, 27 insertions(+), 19 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aae6c6d153f2..28aa8e77cee9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -952,12 +952,19 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -extern void __netif_schedule(struct net_device *dev); +extern void __netif_schedule(struct netdev_queue *txq); -static inline void netif_schedule(struct net_device *dev) +static inline void netif_schedule_queue(struct netdev_queue *txq) { + struct net_device *dev = txq->dev; + if (!test_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(dev); + __netif_schedule(txq); +} + +static inline void netif_schedule(struct net_device *dev) +{ + netif_schedule_queue(&dev->tx_queue); } /** @@ -987,7 +994,7 @@ static inline void netif_wake_queue(struct net_device *dev) } #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(dev); + __netif_schedule(&dev->tx_queue); } /** @@ -1103,7 +1110,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) - __netif_schedule(dev); + __netif_schedule(&dev->tx_queue); #endif } diff --git a/net/core/dev.c b/net/core/dev.c index d6b8d3c3e6ec..0dc888ad4217 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1320,12 +1320,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +void __netif_schedule(struct netdev_queue *txq) { + struct net_device *dev = txq->dev; + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - struct netdev_queue *txq = &dev->tx_queue; - unsigned long flags; struct softnet_data *sd; + unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); @@ -1932,7 +1933,7 @@ static void net_tx_action(struct softirq_action *h) qdisc_run(dev); spin_unlock(&txq->lock); } else { - netif_schedule(dev); + netif_schedule_queue(txq); } } } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2968baa66b91..1c4d3ba6b878 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -885,10 +885,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) spin_unlock_bh(&txq->lock); /* we just requeued the all the frames that were in the removed - * queue, and since we might miss a softirq we do netif_schedule. + * queue, and since we might miss a softirq we do netif_schedule_queue. * ieee80211_wake_queue is not used here as this queue is not * necessarily stopped */ - netif_schedule(local->mdev); + netif_schedule_queue(txq); spin_lock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e73bd68aa7ae..95873f8dd37c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -282,11 +282,11 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = qdisc_dev(wd->qdisc); + struct netdev_queue *txq = wd->qdisc->dev_queue; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule(dev); + netif_schedule_queue(txq); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 99ce3da2b0a4..4efc836cbf38 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(qdisc_dev(sch)); + netif_schedule_queue(sch->dev_queue); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8247a406a401..407dfdb142a4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -62,7 +62,7 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, +static inline int dev_requeue_skb(struct sk_buff *skb, struct netdev_queue *dev_queue, struct Qdisc *q) { @@ -71,7 +71,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, else q->ops->requeue(skb, q); - netif_schedule(dev); + netif_schedule_queue(dev_queue); return 0; } @@ -114,7 +114,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, dev_queue, q); + ret = dev_requeue_skb(skb, dev_queue, q); } return ret; @@ -179,7 +179,7 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, txq, q); + ret = dev_requeue_skb(skb, txq, q); break; } @@ -200,7 +200,7 @@ void __qdisc_run(struct net_device *dev) * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule(dev); + netif_schedule_queue(&dev->tx_queue); break; } } -- cgit v1.2.3 From e8a0464cc950972824e2e128028ae3db666ec1ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:34:19 -0700 Subject: netdev: Allocate multiple queues for TX. alloc_netdev_mq() now allocates an array of netdev_queue structures for TX, based upon the queue_count argument. Furthermore, all accesses to the TX queues are now vectored through the netdev_get_tx_queue() and netdev_for_each_tx_queue() interfaces. This makes it easy to grep the tree for all things that want to get to a TX queue of a net device. Problem spots which are not really multiqueue aware yet, and only work with one queue, can easily be spotted by grepping for all netdev_get_tx_queue() calls that pass in a zero index. Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +- drivers/net/hamradio/bpqether.c | 6 +- drivers/net/ifb.c | 12 ++- drivers/net/macvlan.c | 6 +- drivers/net/wireless/hostap/hostap_hw.c | 6 +- include/linux/netdevice.h | 69 ++++++++----- include/net/sch_generic.h | 37 +++++-- net/8021q/vlan_dev.c | 10 +- net/core/dev.c | 40 +++++-- net/core/rtnetlink.c | 2 +- net/mac80211/main.c | 4 +- net/mac80211/wme.c | 12 +-- net/netrom/af_netrom.c | 6 +- net/rose/af_rose.c | 6 +- net/sched/cls_api.c | 4 +- net/sched/sch_api.c | 32 ++++-- net/sched/sch_generic.c | 178 +++++++++++++++++++++++--------- net/sched/sch_teql.c | 21 ++-- 18 files changed, 320 insertions(+), 137 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fd87dbe7999a..9737c06045d6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5042,7 +5042,9 @@ static int bond_check_params(struct bond_params *params) static struct lock_class_key bonding_netdev_xmit_lock_key; -static void bond_set_lockdep_class_one(struct netdev_queue *txq) +static void bond_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &bonding_netdev_xmit_lock_key); @@ -5050,7 +5052,7 @@ static void bond_set_lockdep_class_one(struct netdev_queue *txq) static void bond_set_lockdep_class(struct net_device *dev) { - bond_set_lockdep_class_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); } /* Create a new bond based on the specified name and bonding parameters. diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index fb186b8c3d4d..b6500b2aacf2 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -124,14 +124,16 @@ static LIST_HEAD(bpq_devices); */ static struct lock_class_key bpq_netdev_xmit_lock_key; -static void bpq_set_lockdep_class_one(struct netdev_queue *txq) +static void bpq_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key); } static void bpq_set_lockdep_class(struct net_device *dev) { - bpq_set_lockdep_class_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); } /* ------------------------------------------------------------------------ */ diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index ccbd6554f6eb..897b05e79ed0 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -229,14 +229,20 @@ module_param(numifbs, int, 0); MODULE_PARM_DESC(numifbs, "Number of ifb devices"); /* - * dev_ifb->tx_queue.lock is usually taken after dev->rx_queue.lock, + * dev_ifb's TX queue lock is usually taken after dev->rx_queue.lock, * reversely to e.g. qdisc_lock_tree(). It should be safe until - * ifb doesn't take dev->tx_queue.lock with dev_ifb->rx_queue.lock. + * ifb doesn't take dev's TX queue lock with dev_ifb->rx_queue.lock. * But lockdep should know that ifb has different locks from dev. */ static struct lock_class_key ifb_tx_queue_lock_key; static struct lock_class_key ifb_rx_queue_lock_key; +static void set_tx_lockdep_key(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->lock, &ifb_tx_queue_lock_key); +} static int __init ifb_init_one(int index) { @@ -258,7 +264,7 @@ static int __init ifb_init_one(int index) if (err < 0) goto err; - lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key); + netdev_for_each_tx_queue(dev_ifb, set_tx_lockdep_key, NULL); lockdep_set_class(&dev_ifb->rx_queue.lock, &ifb_rx_queue_lock_key); return 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 980001c2cf96..72745ce588c6 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -285,7 +285,9 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key; #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) -static void macvlan_set_lockdep_class_one(struct netdev_queue *txq) +static void macvlan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &macvlan_netdev_xmit_lock_key); @@ -293,7 +295,7 @@ static void macvlan_set_lockdep_class_one(struct netdev_queue *txq) static void macvlan_set_lockdep_class(struct net_device *dev) { - macvlan_set_lockdep_class_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL); } static int macvlan_init(struct net_device *dev) diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index c1f4bb005d92..13d5882f1f21 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -3102,7 +3102,9 @@ static void prism2_clear_set_tim_queue(local_info_t *local) */ static struct lock_class_key hostap_netdev_xmit_lock_key; -static void prism2_set_lockdep_class_one(struct netdev_queue *txq) +static void prism2_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &hostap_netdev_xmit_lock_key); @@ -3110,7 +3112,7 @@ static void prism2_set_lockdep_class_one(struct netdev_queue *txq) static void prism2_set_lockdep_class(struct net_device *dev) { - prism2_set_lockdep_class_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL); } static struct net_device * diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 570cf7affa72..f25d4f5a31b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -463,7 +463,7 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; struct netdev_queue *next_sched; -}; +} ____cacheline_aligned_in_smp; /* * The DEVICE structure. @@ -641,7 +641,9 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ struct netdev_queue rx_queue; - struct netdev_queue tx_queue ____cacheline_aligned_in_smp; + + struct netdev_queue *_tx ____cacheline_aligned_in_smp; + unsigned int num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* @@ -764,6 +766,25 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) +static inline +struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, + unsigned int index) +{ + return &dev->_tx[index]; +} + +static inline void netdev_for_each_tx_queue(struct net_device *dev, + void (*f)(struct net_device *, + struct netdev_queue *, + void *), + void *arg) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) + f(dev, &dev->_tx[i], arg); +} + /* * Net namespace inlines */ @@ -977,7 +998,7 @@ static inline void netif_schedule_queue(struct netdev_queue *txq) static inline void netif_schedule(struct net_device *dev) { - netif_schedule_queue(&dev->tx_queue); + netif_schedule_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -993,7 +1014,7 @@ static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) static inline void netif_start_queue(struct net_device *dev) { - netif_tx_start_queue(&dev->tx_queue); + netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1017,7 +1038,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) static inline void netif_wake_queue(struct net_device *dev) { - netif_tx_wake_queue(&dev->tx_queue); + netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1034,7 +1055,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) static inline void netif_stop_queue(struct net_device *dev) { - netif_tx_stop_queue(&dev->tx_queue); + netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1050,7 +1071,7 @@ static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) static inline int netif_queue_stopped(const struct net_device *dev) { - return netif_tx_queue_stopped(&dev->tx_queue); + return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } /** @@ -1134,7 +1155,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) - __netif_schedule(&dev->tx_queue); + __netif_schedule(netdev_get_tx_queue(dev, 0)); } /** @@ -1430,18 +1451,19 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) static inline void netif_tx_lock(struct net_device *dev) { - __netif_tx_lock(&dev->tx_queue, smp_processor_id()); -} + int cpu = smp_processor_id(); + unsigned int i; -static inline void __netif_tx_lock_bh(struct netdev_queue *txq) -{ - spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + __netif_tx_lock(txq, cpu); + } } static inline void netif_tx_lock_bh(struct net_device *dev) { - __netif_tx_lock_bh(&dev->tx_queue); + local_bh_disable(); + netif_tx_lock(dev); } static inline int __netif_tx_trylock(struct netdev_queue *txq) @@ -1454,7 +1476,7 @@ static inline int __netif_tx_trylock(struct netdev_queue *txq) static inline int netif_tx_trylock(struct net_device *dev) { - return __netif_tx_trylock(&dev->tx_queue); + return __netif_tx_trylock(netdev_get_tx_queue(dev, 0)); } static inline void __netif_tx_unlock(struct netdev_queue *txq) @@ -1465,18 +1487,19 @@ static inline void __netif_tx_unlock(struct netdev_queue *txq) static inline void netif_tx_unlock(struct net_device *dev) { - __netif_tx_unlock(&dev->tx_queue); -} + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + __netif_tx_unlock(txq); + } -static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock_bh(&txq->_xmit_lock); } static inline void netif_tx_unlock_bh(struct net_device *dev) { - __netif_tx_unlock_bh(&dev->tx_queue); + netif_tx_unlock(dev); + local_bh_enable(); } #define HARD_TX_LOCK(dev, txq, cpu) { \ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5ba66b555578..b47f556c66f8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -230,32 +230,47 @@ extern void tcf_destroy_chain(struct tcf_proto **fl); /* Reset all TX qdiscs of a device. */ static inline void qdisc_reset_all_tx(struct net_device *dev) { - qdisc_reset(dev->tx_queue.qdisc); + unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) + qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc); } /* Are all TX queues of the device empty? */ static inline bool qdisc_all_tx_empty(const struct net_device *dev) { - const struct netdev_queue *txq = &dev->tx_queue; - const struct Qdisc *q = txq->qdisc; + unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + const struct Qdisc *q = txq->qdisc; - return (q->q.qlen == 0); + if (q->q.qlen) + return false; + } + return true; } /* Are any of the TX qdiscs changing? */ static inline bool qdisc_tx_changing(struct net_device *dev) { - struct netdev_queue *txq = &dev->tx_queue; - - return (txq->qdisc != txq->qdisc_sleeping); + unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + if (txq->qdisc != txq->qdisc_sleeping) + return true; + } + return false; } -/* Is the device using the noop qdisc? */ +/* Is the device using the noop qdisc on all queues? */ static inline bool qdisc_tx_is_noop(const struct net_device *dev) { - const struct netdev_queue *txq = &dev->tx_queue; - - return (txq->qdisc == &noop_qdisc); + unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + if (txq->qdisc != &noop_qdisc) + return false; + } + return true; } static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6b985f23fd9f..f42bc2b26b85 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -570,16 +570,18 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) */ static struct lock_class_key vlan_netdev_xmit_lock_key; -static void vlan_dev_set_lockdep_one(struct netdev_queue *txq, - int subclass) +static void vlan_dev_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_subclass) { lockdep_set_class_and_subclass(&txq->_xmit_lock, - &vlan_netdev_xmit_lock_key, subclass); + &vlan_netdev_xmit_lock_key, + *(int *)_subclass); } static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) { - vlan_dev_set_lockdep_one(&dev->tx_queue, subclass); + netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } static const struct header_ops vlan_header_ops = { diff --git a/net/core/dev.c b/net/core/dev.c index 9b49f74a9820..69378f250695 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1666,6 +1666,12 @@ out_kfree_skb: * --BLG */ +static struct netdev_queue *dev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + return netdev_get_tx_queue(dev, 0); +} + int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -1702,7 +1708,7 @@ int dev_queue_xmit(struct sk_buff *skb) } gso: - txq = &dev->tx_queue; + txq = dev_pick_tx(dev, skb); spin_lock_prefetch(&txq->lock); /* Disable soft irqs for various locks below. Also @@ -3788,8 +3794,9 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } -static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue, - struct net_device *dev) +static void __netdev_init_queue_locks_one(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) { spin_lock_init(&dev_queue->_xmit_lock); netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type); @@ -3798,8 +3805,8 @@ static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue, static void netdev_init_queue_locks(struct net_device *dev) { - __netdev_init_queue_locks_one(&dev->tx_queue, dev); - __netdev_init_queue_locks_one(&dev->rx_queue, dev); + netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); + __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); } /** @@ -4119,7 +4126,8 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue) + struct netdev_queue *queue, + void *_unused) { spin_lock_init(&queue->lock); queue->dev = dev; @@ -4127,8 +4135,8 @@ static void netdev_init_one_queue(struct net_device *dev, static void netdev_init_queues(struct net_device *dev) { - netdev_init_one_queue(dev, &dev->rx_queue); - netdev_init_one_queue(dev, &dev->tx_queue); + netdev_init_one_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); } /** @@ -4145,9 +4153,10 @@ static void netdev_init_queues(struct net_device *dev) struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { - void *p; + struct netdev_queue *tx; struct net_device *dev; int alloc_size; + void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -4167,11 +4176,22 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } + tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL); + if (!tx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "tx qdiscs.\n"); + kfree(p); + return NULL; + } + dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; dev_net_set(dev, &init_net); + dev->_tx = tx; + dev->num_tx_queues = queue_count; + if (sizeof_priv) { dev->priv = ((char *)dev + ((sizeof(struct net_device) + @@ -4205,6 +4225,8 @@ void free_netdev(struct net_device *dev) { release_net(dev_net(dev)); + kfree(dev->_tx); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8ef9f1db610e..71edb8b36341 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -636,7 +636,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - txq = &dev->tx_queue; + txq = netdev_get_tx_queue(dev, 0); if (txq->qdisc_sleeping) NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index af0056e7e5b3..b486e634f4fe 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -621,7 +621,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* ensure that TX flow won't interrupt us * until the end of the call to requeue function */ - txq = &local->mdev->tx_queue; + txq = netdev_get_tx_queue(local->mdev, 0); spin_lock_bh(&txq->lock); /* create a new queue for this aggregation */ @@ -862,7 +862,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) /* avoid ordering issues: we are the only one that can modify * the content of the qdiscs */ - txq = &local->mdev->tx_queue; + txq = netdev_get_tx_queue(local->mdev, 0); spin_lock_bh(&txq->lock); /* remove the queue for this aggregation */ ieee80211_ht_agg_queue_remove(local, sta, tid, 1); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 6ae43a3c7726..f014cd38c2d0 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -574,7 +574,7 @@ static struct Qdisc_ops wme_qdisc_ops __read_mostly = void ieee80211_install_qdisc(struct net_device *dev) { - struct netdev_queue *txq = &dev->tx_queue; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct Qdisc *qdisc; qdisc = qdisc_create_dflt(dev, txq, @@ -596,7 +596,7 @@ void ieee80211_install_qdisc(struct net_device *dev) int ieee80211_qdisc_installed(struct net_device *dev) { - struct netdev_queue *txq = &dev->tx_queue; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); return txq->qdisc_sleeping->ops == &wme_qdisc_ops; } @@ -617,7 +617,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { int i; - struct netdev_queue *txq = &local->mdev->tx_queue; + struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0); struct ieee80211_sched_data *q = qdisc_priv(txq->qdisc_sleeping); DECLARE_MAC_BUF(mac); @@ -652,14 +652,14 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, } /** - * the caller needs to hold local->mdev->tx_queue.lock + * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock */ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, u8 requeue) { struct ieee80211_hw *hw = &local->hw; - struct netdev_queue *txq = &local->mdev->tx_queue; + struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0); struct ieee80211_sched_data *q = qdisc_priv(txq->qdisc_sleeping); int agg_queue = sta->tid_to_tx_q[tid]; @@ -676,7 +676,7 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, void ieee80211_requeue(struct ieee80211_local *local, int queue) { - struct netdev_queue *txq = &local->mdev->tx_queue; + struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0); struct Qdisc *root_qd = txq->qdisc_sleeping; struct ieee80211_sched_data *q = qdisc_priv(root_qd); struct Qdisc *qdisc = q->queues[queue]; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 819afc449e1e..d41be0d66eb0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -74,14 +74,16 @@ static const struct proto_ops nr_proto_ops; */ static struct lock_class_key nr_netdev_xmit_lock_key; -static void nr_set_lockdep_one(struct netdev_queue *txq) +static void nr_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); } static void nr_set_lockdep_key(struct net_device *dev) { - nr_set_lockdep_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); } /* diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7dbbc0891623..f3a691f34909 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -75,14 +75,16 @@ ax25_address rose_callsign; */ static struct lock_class_key rose_netdev_xmit_lock_key; -static void rose_set_lockdep_one(struct netdev_queue *txq) +static void rose_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) { lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); } static void rose_set_lockdep_key(struct net_device *dev) { - rose_set_lockdep_one(&dev->tx_queue); + netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); } /* diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b483bbea6118..d0b0a9b14394 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -166,7 +166,7 @@ replay: /* Find qdisc */ if (!parent) { - struct netdev_queue *dev_queue = &dev->tx_queue; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); q = dev_queue->qdisc_sleeping; parent = q->handle; } else { @@ -410,7 +410,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) q = dev_queue->qdisc_sleeping; else diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 95873f8dd37c..830ccc544a15 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -183,9 +183,8 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +static struct Qdisc *__qdisc_lookup(struct netdev_queue *dev_queue, u32 handle) { - struct netdev_queue *dev_queue = &dev->tx_queue; struct Qdisc *q; list_for_each_entry(q, &dev_queue->qdisc_list, list) { @@ -195,6 +194,19 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) return NULL; } +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q = __qdisc_lookup(txq, handle); + if (q) + return q; + } + return NULL; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -462,7 +474,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) } } else { - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); oqdisc = dev_queue->qdisc_sleeping; /* Prune old scheduler */ @@ -742,7 +754,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->rx_queue.qdisc; } } else { - struct netdev_queue *dev_queue = &dev->tx_queue; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); q = dev_queue->qdisc_sleeping; } if (!q) @@ -817,7 +830,8 @@ replay: q = dev->rx_queue.qdisc; } } else { - struct netdev_queue *dev_queue = &dev->tx_queue; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); q = dev_queue->qdisc_sleeping; } @@ -899,7 +913,7 @@ create_n_graft: tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, &dev->tx_queue, + q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { @@ -1025,7 +1039,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) s_q_idx = 0; q_idx = 0; - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); list_for_each_entry(q, &dev_queue->qdisc_list, list) { if (q_idx < s_q_idx) { q_idx++; @@ -1098,7 +1112,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1275,7 +1289,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); list_for_each_entry(q, &dev_queue->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 243de935b182..4e2b865cbba0 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -40,20 +40,30 @@ */ void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->tx_queue.lock) __acquires(dev->rx_queue.lock) { - spin_lock_bh(&dev->tx_queue.lock); + unsigned int i; + + local_bh_disable(); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + spin_lock(&txq->lock); + } spin_lock(&dev->rx_queue.lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) __releases(dev->rx_queue.lock) - __releases(dev->tx_queue.lock) { + unsigned int i; + spin_unlock(&dev->rx_queue.lock); - spin_unlock_bh(&dev->tx_queue.lock); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + spin_unlock(&txq->lock); + } + local_bh_enable(); } EXPORT_SYMBOL(qdisc_unlock_tree); @@ -212,22 +222,37 @@ void __qdisc_run(struct netdev_queue *txq) static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - struct netdev_queue *txq = &dev->tx_queue; netif_tx_lock(dev); - if (txq->qdisc != &noop_qdisc) { + if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { + int some_queue_stopped = 0; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); + if (netif_tx_queue_stopped(txq)) { + some_queue_stopped = 1; + break; + } + } - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", + if (some_queue_stopped && + time_after(jiffies, (dev->trans_start + + dev->watchdog_timeo))) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: " + "transmit timed out\n", dev->name); dev->tx_timeout(dev); WARN_ON_ONCE(1); } - if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + + dev->watchdog_timeo))) dev_hold(dev); } } @@ -542,9 +567,55 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); +static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (txq->qdisc_sleeping != &noop_qdisc) + return false; + } + return true; +} + +static void attach_one_default_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, dev_queue, + &pfifo_fast_ops, TC_H_ROOT); + if (!qdisc) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + list_add_tail(&qdisc->list, &dev_queue->qdisc_list); + } else { + qdisc = &noqueue_qdisc; + } + dev_queue->qdisc_sleeping = qdisc; +} + +static void transition_one_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_need_watchdog) +{ + int *need_watchdog_p = _need_watchdog; + + spin_lock_bh(&dev_queue->lock); + rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping); + if (dev_queue->qdisc != &noqueue_qdisc) + *need_watchdog_p = 1; + spin_unlock_bh(&dev_queue->lock); +} + void dev_activate(struct net_device *dev) { - struct netdev_queue *txq = &dev->tx_queue; + int need_watchdog; /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, @@ -552,39 +623,27 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (txq->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, txq, - &pfifo_fast_ops, - TC_H_ROOT); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - list_add_tail(&qdisc->list, &txq->qdisc_list); - } else { - qdisc = &noqueue_qdisc; - } - txq->qdisc_sleeping = qdisc; - } + if (dev_all_qdisc_sleeping_noop(dev)) + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&txq->lock); - rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping); - if (txq->qdisc != &noqueue_qdisc) { + need_watchdog = 0; + netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + + if (need_watchdog) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&txq->lock); } -static void dev_deactivate_queue(struct netdev_queue *dev_queue, - struct Qdisc *qdisc_default) +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { + struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; struct sk_buff *skb; @@ -603,12 +662,35 @@ static void dev_deactivate_queue(struct netdev_queue *dev_queue, kfree_skb(skb); } +static bool some_qdisc_is_running(struct net_device *dev, int lock) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + int val; + + dev_queue = netdev_get_tx_queue(dev, i); + + if (lock) + spin_lock_bh(&dev_queue->lock); + + val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state); + + if (lock) + spin_unlock_bh(&dev_queue->lock); + + if (val) + return true; + } + return false; +} + void dev_deactivate(struct net_device *dev) { - struct netdev_queue *dev_queue = &dev->tx_queue; - int running; + bool running; - dev_deactivate_queue(dev_queue, &noop_qdisc); + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -617,17 +699,14 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ do { - while (test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state)) + while (some_qdisc_is_running(dev, 0)) yield(); /* * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev_queue->lock); - running = test_bit(__QUEUE_STATE_QDISC_RUNNING, - &dev_queue->state); - spin_unlock_bh(&dev_queue->lock); + running = some_qdisc_is_running(dev, 1); /* * The running flag should never be set at this point because @@ -642,8 +721,10 @@ void dev_deactivate(struct net_device *dev) static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *qdisc) + void *_qdisc) { + struct Qdisc *qdisc = _qdisc; + dev_queue->qdisc = qdisc; dev_queue->qdisc_sleeping = qdisc; INIT_LIST_HEAD(&dev_queue->qdisc_list); @@ -652,18 +733,19 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { qdisc_lock_tree(dev); - dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); qdisc_unlock_tree(dev); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -static void dev_shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - struct Qdisc *qdisc_default) +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { dev_queue->qdisc = qdisc_default; @@ -676,8 +758,8 @@ static void dev_shutdown_scheduler_queue(struct net_device *dev, void dev_shutdown(struct net_device *dev) { qdisc_lock_tree(dev); - dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); - dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); + shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); qdisc_unlock_tree(dev); } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 8ac05981be20..44a2c3451f4d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -111,7 +111,7 @@ teql_dequeue(struct Qdisc* sch) struct sk_buff *skb; skb = __skb_dequeue(&dat->q); - dat_queue = &dat->m->dev->tx_queue; + dat_queue = netdev_get_tx_queue(dat->m->dev, 0); if (skb == NULL) { struct net_device *m = qdisc_dev(dat_queue->qdisc); if (m) { @@ -155,10 +155,13 @@ teql_destroy(struct Qdisc* sch) if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - spin_lock_bh(&master->dev->tx_queue.lock); - qdisc_reset(master->dev->tx_queue.qdisc); - spin_unlock_bh(&master->dev->tx_queue.lock); + spin_lock_bh(&txq->lock); + qdisc_reset(txq->qdisc); + spin_unlock_bh(&txq->lock); } } skb_queue_purge(&dat->q); @@ -218,7 +221,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc); + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); struct neighbour *mn = skb->dst->neighbour; struct neighbour *n = q->ncache; @@ -254,7 +258,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - if (dev->tx_queue.qdisc == &noop_qdisc) + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc == &noop_qdisc) return -ENODEV; if (dev->header_ops == NULL || @@ -285,8 +290,10 @@ restart: do { struct net_device *slave = qdisc_dev(q); + struct netdev_queue *slave_txq; - if (slave->tx_queue.qdisc_sleeping != q) + slave_txq = netdev_get_tx_queue(slave, 0); + if (slave_txq->qdisc_sleeping != q) continue; if (netif_queue_stopped(slave) || __netif_subqueue_stopped(slave, subq) || -- cgit v1.2.3 From 7698b4fcabcd790efc4f226bada1e7b5870653af Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 01:42:40 -0700 Subject: pkt_sched: Add and use qdisc_root() and qdisc_root_lock(). When code wants to lock the qdisc tree state, the logic operation it's doing is locking the top-level qdisc that sits of the root of the netdev_queue. Add qdisc_root_lock() to represent this and convert the easiest cases. In order for this to work out in all cases, we have to hook up the noop_qdisc to a dummy netdev_queue. Signed-off-by: David S. Miller --- include/net/sch_generic.h | 12 ++++++++++++ net/sched/sch_api.c | 8 ++++---- net/sched/sch_cbq.c | 9 +++++---- net/sched/sch_generic.c | 21 ++++++++++++++++----- net/sched/sch_hfsc.c | 4 ++-- net/sched/sch_htb.c | 18 ++++++++++-------- net/sched/sch_netem.c | 9 ++++++--- 7 files changed, 55 insertions(+), 26 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bc2a09da21b1..92417825d387 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -161,6 +161,18 @@ struct tcf_proto struct tcf_proto_ops *ops; }; +static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc) +{ + return qdisc->dev_queue->qdisc; +} + +static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root(qdisc); + + return &root->dev_queue->lock; +} + static inline struct net_device *qdisc_dev(struct Qdisc *qdisc) { return qdisc->dev_queue->dev; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 830ccc544a15..19c244a00839 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -633,7 +633,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE]); if (err) { /* @@ -675,7 +675,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) } if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - &sch->dev_queue->lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -967,7 +967,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, q->qstats.qlen = q->q.qlen; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &q->dev_queue->lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1216,7 +1216,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &q->dev_queue->lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4efc836cbf38..37ae653db683 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1744,12 +1744,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT + spinlock_t *root_lock = qdisc_root_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&sch->dev_queue->lock); + spin_lock_bh(root_lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); #endif cbq_destroy_class(sch, cl); @@ -1828,7 +1829,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1919,7 +1920,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ac208c2b2d10..739a8711ab30 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -151,14 +151,17 @@ static inline int qdisc_restart(struct netdev_queue *txq, { int ret = NETDEV_TX_BUSY; struct net_device *dev; + spinlock_t *root_lock; struct sk_buff *skb; /* Dequeue packet */ if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; - /* And release queue */ - spin_unlock(&txq->lock); + root_lock = qdisc_root_lock(q); + + /* And release qdisc */ + spin_unlock(root_lock); dev = txq->dev; @@ -167,7 +170,7 @@ static inline int qdisc_restart(struct netdev_queue *txq, ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); - spin_lock(&txq->lock); + spin_lock(root_lock); switch (ret) { case NETDEV_TX_OK: @@ -345,12 +348,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct netdev_queue noop_netdev_queue = { + .lock = __SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock), + .qdisc = &noop_qdisc, +}; + struct Qdisc noop_qdisc = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -666,19 +675,21 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; + spinlock_t *root_lock; struct Qdisc *q; int val; dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc; + root_lock = qdisc_root_lock(q); if (lock) - spin_lock_bh(&dev_queue->lock); + spin_lock_bh(root_lock); val = test_bit(__QDISC_STATE_RUNNING, &q->state); if (lock) - spin_unlock_bh(&dev_queue->lock); + spin_unlock_bh(root_lock); if (val) return true; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 997d520ca580..5090708ba384 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c8ca54cc26b0..ee48457eaa4a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1039,11 +1039,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { + spinlock_t *root_lock = qdisc_root_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev_queue->lock); + spin_lock_bh(root_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1057,11 +1058,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1070,10 +1071,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + spinlock_t *root_lock = qdisc_root_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev_queue->lock); + spin_lock_bh(root_lock); tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) @@ -1095,11 +1097,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1365,7 +1367,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1420,7 +1422,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bc585f2089ff..c5ea40c9eb21 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = sch->dev_queue->qdisc; + struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; @@ -319,6 +319,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) struct netem_sched_data *q = qdisc_priv(sch); unsigned long n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); + spinlock_t *root_lock; struct disttable *d; int i; @@ -333,9 +334,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&sch->dev_queue->lock); + root_lock = qdisc_root_lock(sch); + + spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&sch->dev_queue->lock); + spin_unlock_bh(root_lock); kfree(d); return 0; -- cgit v1.2.3 From 37437bb2e1ae8af470dfcd5b4ff454110894ccaf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 02:15:04 -0700 Subject: pkt_sched: Schedule qdiscs instead of netdev_queue. When we have shared qdiscs, packets come out of the qdiscs for multiple transmit queues. Therefore it doesn't make any sense to schedule the transmit queue when logically we cannot know ahead of time the TX queue of the SKB that the qdisc->dequeue() will give us. Just for sanity I added a BUG check to make sure we never get into a state where the noop_qdisc is scheduled. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++----- include/net/pkt_sched.h | 11 +++----- include/net/sch_generic.h | 2 ++ net/core/dev.c | 68 +++++++++++++++++++---------------------------- net/sched/sch_api.c | 3 +-- net/sched/sch_cbq.c | 2 +- net/sched/sch_generic.c | 30 ++++++++++----------- 7 files changed, 55 insertions(+), 73 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9240a95793be..1e839fa01434 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -275,7 +275,6 @@ enum netdev_state_t { __LINK_STATE_START, __LINK_STATE_PRESENT, - __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, @@ -452,7 +451,6 @@ struct netdev_queue { int xmit_lock_owner; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; - struct netdev_queue *next_sched; } ____cacheline_aligned_in_smp; /* @@ -969,7 +967,7 @@ static inline int unregister_gifconf(unsigned int family) */ struct softnet_data { - struct netdev_queue *output_queue; + struct Qdisc *output_queue; struct sk_buff_head input_pkt_queue; struct list_head poll_list; struct sk_buff *completion_queue; @@ -984,12 +982,12 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -extern void __netif_schedule(struct netdev_queue *txq); +extern void __netif_schedule(struct Qdisc *q); static inline void netif_schedule_queue(struct netdev_queue *txq) { if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq); + __netif_schedule(txq->qdisc); } static inline void netif_tx_schedule_all(struct net_device *dev) @@ -1042,7 +1040,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) } #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue); + __netif_schedule(dev_queue->qdisc); } static inline void netif_wake_queue(struct net_device *dev) @@ -1186,7 +1184,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) return; #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq); + __netif_schedule(txq->qdisc); } /** diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 06a442d85186..e4e30052e4e2 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -84,15 +84,12 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); -extern void __qdisc_run(struct netdev_queue *txq); +extern void __qdisc_run(struct Qdisc *q); -static inline void qdisc_run(struct netdev_queue *txq) +static inline void qdisc_run(struct Qdisc *q) { - struct Qdisc *q = txq->qdisc; - - if (!netif_tx_queue_stopped(txq) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) - __qdisc_run(txq); + if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) + __qdisc_run(q); } extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 92417825d387..3cc4b5cd8c6a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -26,6 +26,7 @@ struct qdisc_rate_table enum qdisc_state_t { __QDISC_STATE_RUNNING, + __QDISC_STATE_SCHED, }; struct Qdisc @@ -45,6 +46,7 @@ struct Qdisc struct sk_buff *gso_skb; struct sk_buff_head q; struct netdev_queue *dev_queue; + struct Qdisc *next_sched; struct list_head list; struct gnet_stats_basic bstats; diff --git a/net/core/dev.c b/net/core/dev.c index 467bfb325123..0b909b74f698 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1323,18 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct netdev_queue *txq) +void __netif_schedule(struct Qdisc *q) { - struct net_device *dev = txq->dev; + BUG_ON(q == &noop_qdisc); - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - txq->next_sched = sd->output_queue; - sd->output_queue = txq; + q->next_sched = sd->output_queue; + sd->output_queue = q; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1771,37 +1771,23 @@ gso: rcu_read_lock_bh(); txq = dev_pick_tx(dev, skb); - spin_lock_prefetch(&txq->lock); - - /* Updates of qdisc are serialized by queue->lock. - * The struct Qdisc which is pointed to by qdisc is now a - * rcu structure - it may be accessed without acquiring - * a lock (but the structure may be stale.) The freeing of the - * qdisc will be deferred until it's known that there are no - * more references to it. - * - * If the qdisc has an enqueue function, we still need to - * hold the queue->lock before calling it, since queue->lock - * also serializes access to the device queue. - */ - q = rcu_dereference(txq->qdisc); + #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - /* Grab device queue */ - spin_lock(&txq->lock); - q = txq->qdisc; - if (q->enqueue) { - rc = q->enqueue(skb, q); - qdisc_run(txq); - spin_unlock(&txq->lock); - - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; - } - spin_unlock(&txq->lock); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock(root_lock); + + rc = q->enqueue(skb, q); + qdisc_run(q); + + spin_unlock(root_lock); + + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; } /* The device has no queue. Common case for software devices: @@ -1974,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h) } if (sd->output_queue) { - struct netdev_queue *head; + struct Qdisc *head; local_irq_disable(); head = sd->output_queue; @@ -1982,18 +1968,20 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct netdev_queue *txq = head; - struct net_device *dev = txq->dev; + struct Qdisc *q = head; + spinlock_t *root_lock; + head = head->next_sched; smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + clear_bit(__QDISC_STATE_SCHED, &q->state); - if (spin_trylock(&txq->lock)) { - qdisc_run(txq); - spin_unlock(&txq->lock); + root_lock = qdisc_root_lock(q); + if (spin_trylock(root_lock)) { + qdisc_run(q); + spin_unlock(root_lock); } else { - netif_schedule_queue(txq); + __netif_schedule(q); } } } @@ -4459,7 +4447,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct netdev_queue **list_net; + struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 19c244a00839..8e8c5becc348 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -294,11 +294,10 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct netdev_queue *txq = wd->qdisc->dev_queue; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule_queue(txq); + __netif_schedule(wd->qdisc); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 37ae653db683..a3953bbe2d79 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule_queue(sch->dev_queue); + __netif_schedule(sch); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 739a8711ab30..dd5c4e70abe4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -72,16 +72,14 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) q->gso_skb = skb; else q->ops->requeue(skb, q); - netif_schedule_queue(dev_queue); + __netif_schedule(q); return 0; } @@ -121,7 +119,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev_queue, q); + ret = dev_requeue_skb(skb, q); } return ret; @@ -146,9 +144,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct netdev_queue *txq, - struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q) { + struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; struct net_device *dev; spinlock_t *root_lock; @@ -163,7 +161,8 @@ static inline int qdisc_restart(struct netdev_queue *txq, /* And release qdisc */ spin_unlock(root_lock); - dev = txq->dev; + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) @@ -189,29 +188,28 @@ static inline int qdisc_restart(struct netdev_queue *txq, printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, txq, q); + ret = dev_requeue_skb(skb, q); break; } + if (ret && netif_tx_queue_stopped(txq)) + ret = 0; + return ret; } -void __qdisc_run(struct netdev_queue *txq) +void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; - struct Qdisc *q = txq->qdisc; - - while (qdisc_restart(txq, q)) { - if (netif_tx_queue_stopped(txq)) - break; + while (qdisc_restart(q)) { /* * Postpone processing if * 1. another process needs the CPU; * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule_queue(txq); + __netif_schedule(q); break; } } -- cgit v1.2.3 From ead81cc5fc6d996db6afb20f211241612610a07a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:50:32 -0700 Subject: netdevice: Move qdisc_list back into net_device proper. And give it it's own lock. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 2 ++ net/sched/sch_api.c | 31 +++++++------------------------ net/sched/sch_generic.c | 9 +++++++-- 4 files changed, 18 insertions(+), 27 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e839fa01434..3170bcef734b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -450,7 +450,6 @@ struct netdev_queue { spinlock_t _xmit_lock; int xmit_lock_owner; struct Qdisc *qdisc_sleeping; - struct list_head qdisc_list; } ____cacheline_aligned_in_smp; /* @@ -638,6 +637,8 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ + spinlock_t qdisc_list_lock; + struct list_head qdisc_list; /* * One part is mostly used on xmit path (device) diff --git a/net/core/dev.c b/net/core/dev.c index 0b909b74f698..6741e344ac59 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3886,6 +3886,8 @@ int register_netdevice(struct net_device *dev) net = dev_net(dev); spin_lock_init(&dev->addr_list_lock); + spin_lock_init(&dev->qdisc_list_lock); + INIT_LIST_HEAD(&dev->qdisc_list); netdev_init_queue_locks(dev); dev->iflink = -1; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8e8c5becc348..6958fe7c9a77 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -183,30 +183,17 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ -static struct Qdisc *__qdisc_lookup(struct netdev_queue *dev_queue, u32 handle) +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - list_for_each_entry(q, &dev_queue->qdisc_list, list) { + list_for_each_entry(q, &dev->qdisc_list, list) { if (q->handle == handle) return q; } return NULL; } -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q = __qdisc_lookup(txq, handle); - if (q) - return q; - } - return NULL; -} - static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -645,9 +632,9 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev_queue->qdisc_list); - qdisc_unlock_tree(dev); + spin_lock_bh(&dev->qdisc_list_lock); + list_add_tail(&sch->list, &dev->qdisc_list); + spin_unlock_bh(&dev->qdisc_list_lock); return sch; } @@ -1032,14 +1019,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { - struct netdev_queue *dev_queue; if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - list_for_each_entry(q, &dev_queue->qdisc_list, list) { + list_for_each_entry(q, &dev->qdisc_list, list) { if (q_idx < s_q_idx) { q_idx++; continue; @@ -1269,7 +1254,6 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -1288,8 +1272,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - list_for_each_entry(q, &dev_queue->qdisc_list, list) { + list_for_each_entry(q, &dev->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index efa418a1b34e..8cdf0b4a6a5a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -563,11 +563,15 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { + struct net_device *dev = qdisc_dev(qdisc); + if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; + spin_lock_bh(&dev->qdisc_list_lock); list_del(&qdisc->list); + spin_unlock_bh(&dev->qdisc_list_lock); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -599,7 +603,9 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - list_add_tail(&qdisc->list, &dev_queue->qdisc_list); + spin_lock_bh(&dev->qdisc_list_lock); + list_add_tail(&qdisc->list, &dev->qdisc_list); + spin_unlock_bh(&dev->qdisc_list_lock); } else { qdisc = &noqueue_qdisc; } @@ -738,7 +744,6 @@ static void dev_init_scheduler_queue(struct net_device *dev, dev_queue->qdisc = qdisc; dev_queue->qdisc_sleeping = qdisc; - INIT_LIST_HEAD(&dev_queue->qdisc_list); } void dev_init_scheduler(struct net_device *dev) -- cgit v1.2.3 From 53049978df1d9ae55bf397c9879e6b33218352db Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 03:00:19 -0700 Subject: pkt_sched: Make qdisc grafting locking more specific. Lock the root of the qdisc being operated upon. All explicit references to qdisc_tree_lock() are now gone. The only remaining uses are via the sch_tree_{lock,unlock}() and tcf_tree_{lock,unlock}() macros. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6958fe7c9a77..74924893ef7f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -441,15 +441,29 @@ static struct Qdisc * dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) { struct netdev_queue *dev_queue; + spinlock_t *root_lock; struct Qdisc *oqdisc; + int ingress; if (dev->flags & IFF_UP) dev_deactivate(dev); - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { + ingress = 0; + if (qdisc && qdisc->flags&TCQ_F_INGRESS) + ingress = 1; + + if (ingress) { dev_queue = &dev->rx_queue; oqdisc = dev_queue->qdisc; + } else { + dev_queue = netdev_get_tx_queue(dev, 0); + oqdisc = dev_queue->qdisc_sleeping; + } + + root_lock = qdisc_root_lock(oqdisc); + spin_lock_bh(root_lock); + + if (ingress) { /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { /* delete */ @@ -460,9 +474,6 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) } } else { - dev_queue = netdev_get_tx_queue(dev, 0); - oqdisc = dev_queue->qdisc_sleeping; - /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) qdisc_reset(oqdisc); @@ -474,7 +485,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) dev_queue->qdisc = &noop_qdisc; } - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); if (dev->flags & IFF_UP) dev_activate(dev); @@ -765,10 +776,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) return err; if (q) { + spinlock_t *root_lock = qdisc_root_lock(q); + qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); + spin_unlock_bh(root_lock); qdisc_destroy(q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } } else { qdisc_notify(skb, n, clid, NULL, q); @@ -911,20 +924,24 @@ create_n_graft: graft: if (1) { struct Qdisc *old_q = NULL; + spinlock_t *root_lock; + err = qdisc_graft(dev, p, clid, q, &old_q); if (err) { if (q) { - qdisc_lock_tree(dev); + root_lock = qdisc_root_lock(q); + spin_lock_bh(root_lock); qdisc_destroy(q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } return err; } qdisc_notify(skb, n, clid, old_q, q); if (old_q) { - qdisc_lock_tree(dev); + root_lock = qdisc_root_lock(old_q); + spin_lock_bh(root_lock); qdisc_destroy(old_q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } } return 0; -- cgit v1.2.3 From 99194cff398d056e5ee469647c294466c246c88a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 04:54:10 -0700 Subject: pkt_sched: Add multiqueue handling to qdisc_graft(). Move the destruction of the old queue into qdisc_graft(). When operating on a root qdisc (ie. "parent == NULL"), apply the operation to all queues. The caller has grabbed a single implicit reference for this graft, therefore when we apply the change to more than one queue we must grab additional qdisc references. Otherwise, we are operating on a class of a specific parent qdisc, and therefore no multiqueue handling is necessary. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 101 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 42 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 74924893ef7f..b3ef8307204e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -435,28 +435,22 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device dev */ +/* Attach toplevel qdisc to device queue. */ -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) { - struct netdev_queue *dev_queue; spinlock_t *root_lock; struct Qdisc *oqdisc; int ingress; - if (dev->flags & IFF_UP) - dev_deactivate(dev); - ingress = 0; if (qdisc && qdisc->flags&TCQ_F_INGRESS) ingress = 1; if (ingress) { - dev_queue = &dev->rx_queue; oqdisc = dev_queue->qdisc; } else { - dev_queue = netdev_get_tx_queue(dev, 0); oqdisc = dev_queue->qdisc_sleeping; } @@ -487,9 +481,6 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) spin_unlock_bh(root_lock); - if (dev->flags & IFF_UP) - dev_activate(dev); - return oqdisc; } @@ -521,26 +512,66 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(skb, n, clid, old, new); - Old qdisc is not destroyed but returned in *old. + if (old) { + spin_lock_bh(&old->q.lock); + qdisc_destroy(old); + spin_unlock_bh(&old->q.lock); + } +} + +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); - } else { - *old = dev_graft_qdisc(dev, new); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if (q && q->flags & TCQ_F_INGRESS) { + num_q = 1; + ingress = 1; } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + if (ingress) { + old = dev_graft_qdisc(dev_queue, q); + } else { + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + } + notify_and_destroy(skb, n, classid, old, new); + } + + if (dev->flags & IFF_UP) + dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; @@ -549,10 +580,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (cops) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); } } + if (!err) + notify_and_destroy(skb, n, classid, old, new); } return err; } @@ -773,16 +806,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - spinlock_t *root_lock = qdisc_root_lock(q); - - qdisc_notify(skb, n, clid, q, NULL); - spin_unlock_bh(root_lock); - qdisc_destroy(q); - spin_unlock_bh(root_lock); - } } else { qdisc_notify(skb, n, clid, NULL, q); } @@ -923,10 +948,9 @@ create_n_graft: graft: if (1) { - struct Qdisc *old_q = NULL; spinlock_t *root_lock; - err = qdisc_graft(dev, p, clid, q, &old_q); + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { if (q) { root_lock = qdisc_root_lock(q); @@ -936,13 +960,6 @@ graft: } return err; } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - root_lock = qdisc_root_lock(old_q); - spin_lock_bh(root_lock); - qdisc_destroy(old_q); - spin_unlock_bh(root_lock); - } } return 0; } -- cgit v1.2.3 From 3072367300aa8c779e3a14ee8e89de079e90f3ad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 22:50:15 -0700 Subject: pkt_sched: Manage qdisc list inside of root qdisc. Idea is from Patrick McHardy. Instead of managing the list of qdiscs on the device level, manage it in the root qdisc of a netdev_queue. This solves all kinds of visibility issues during qdisc destruction. The way to iterate over all qdiscs of a netdev_queue is to visit the netdev_queue->qdisc, and then traverse it's list. The only special case is to ignore builting qdiscs at the root when dumping or doing a qdisc_lookup(). That was not needed previously because builtin qdiscs were not added to the device's qdisc_list. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 - net/core/dev.c | 2 - net/sched/sch_api.c | 175 ++++++++++++++++++++++++++++++++++------------ net/sched/sch_generic.c | 10 +-- 4 files changed, 133 insertions(+), 56 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c5a68850114..812bcd8b4363 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -636,8 +636,6 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ - spinlock_t qdisc_list_lock; - struct list_head qdisc_list; /* * One part is mostly used on xmit path (device) diff --git a/net/core/dev.c b/net/core/dev.c index e54acde839da..065b9817e209 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev) net = dev_net(dev); spin_lock_init(&dev->addr_list_lock); - spin_lock_init(&dev->qdisc_list_lock); - INIT_LIST_HEAD(&dev->qdisc_list); netdev_init_queue_locks(dev); dev->iflink = -1; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b3ef8307204e..fb43731c9860 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - struct Qdisc *q; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q, *txq_root = txq->qdisc; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) - return q; + if (!(txq_root->flags & TCQ_F_BUILTIN) && + txq_root->handle == handle) + return txq_root; + + list_for_each_entry(q, &txq_root->list, list) { + if (q->handle == handle) + return q; + } } return NULL; } @@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - spin_lock_bh(&dev->qdisc_list_lock); - list_add_tail(&sch->list, &dev->qdisc_list); - spin_unlock_bh(&dev->qdisc_list_lock); + if (parent) + list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; } @@ -1037,13 +1045,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct netdev_queue *dev_queue; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; +done: cb->args[0] = t; dev_put(dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e244c462e6bd..14cc443d0490 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - struct net_device *dev = qdisc_dev(qdisc); - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; - spin_lock_bh(&dev->qdisc_list_lock); - list_del(&qdisc->list); - spin_unlock_bh(&dev->qdisc_list_lock); + if (qdisc->parent) + list_del(&qdisc->list); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - spin_lock_bh(&dev->qdisc_list_lock); - list_add_tail(&qdisc->list, &dev->qdisc_list); - spin_unlock_bh(&dev->qdisc_list_lock); } else { qdisc = &noqueue_qdisc; } -- cgit v1.2.3 From 175f9c1bba9b825d22b142d183c9e175488b260c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 20 Jul 2008 00:08:47 -0700 Subject: net_sched: Add size table for qdiscs Add size table functions for qdiscs and calculate packet size in qdisc_enqueue(). Based on patch by Patrick McHardy http://marc.info/?l=linux-netdev&m=115201979221729&w=2 Signed-off-by: Jussi Kivilinna Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 20 ++++++ include/linux/rtnetlink.h | 1 + include/net/pkt_sched.h | 1 + include/net/sch_generic.h | 25 +++++++- net/sched/sch_api.c | 151 +++++++++++++++++++++++++++++++++++++++++++++- net/sched/sch_generic.c | 1 + net/sched/sch_netem.c | 5 +- 7 files changed, 199 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 87f4e0fa8f27..e5de421ac7b4 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -85,6 +85,26 @@ struct tc_ratespec #define TC_RTAB_SIZE 1024 +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + /* FIFO section */ struct tc_fifo_qopt diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index b358c704d102..f4d386c191f5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -482,6 +482,7 @@ enum TCA_RATE, TCA_FCNT, TCA_STATS2, + TCA_STAB, __TCA_MAX }; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index e4e30052e4e2..6affcfaa123e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -83,6 +83,7 @@ extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); +extern void qdisc_put_stab(struct qdisc_size_table *tab); extern void __qdisc_run(struct Qdisc *q); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 8229520e088a..db9ad655eb8a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -29,6 +29,13 @@ enum qdisc_state_t __QDISC_STATE_SCHED, }; +struct qdisc_size_table { + struct list_head list; + struct tc_sizespec szopts; + int refcnt; + u16 data[]; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); @@ -39,6 +46,7 @@ struct Qdisc #define TCQ_F_INGRESS 4 int padded; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; u32 handle; u32 parent; atomic_t refcnt; @@ -165,6 +173,16 @@ struct tcf_proto struct tcf_proto_ops *ops; }; +struct qdisc_skb_cb { + unsigned int pkt_len; + char data[]; +}; + +static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) +{ + return (struct qdisc_skb_cb *)skb->cb; +} + static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) { return &qdisc->q.lock; @@ -257,6 +275,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); +extern void qdisc_calculate_pkt_len(struct sk_buff *skb, + struct qdisc_size_table *stab); extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); @@ -308,16 +328,19 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev) static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) { - return skb->len; + return qdisc_skb_cb(skb)->pkt_len; } static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + if (sch->stab) + qdisc_calculate_pkt_len(skb, sch->stab); return sch->enqueue(skb, sch); } static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) { + qdisc_skb_cb(skb)->pkt_len = skb->len; return qdisc_enqueue(skb, sch); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fb43731c9860..5219d5f9d754 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -286,6 +286,129 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } EXPORT_SYMBOL(qdisc_put_rtab); +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, @@ -613,6 +736,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_KMOD @@ -670,6 +794,14 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out3; + } + sch->stab = stab; + } if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, qdisc_root_lock(sch), @@ -691,6 +823,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -702,15 +835,26 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, qdisc_root_lock(sch), tca[TCA_RATE]); @@ -994,6 +1138,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 522a41a9f904..27a51f04db49 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -469,6 +469,7 @@ static void __qdisc_destroy(struct rcu_head *head) struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); const struct Qdisc_ops *ops = qdisc->ops; + qdisc_put_stab(qdisc->stab); gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) ops->reset(qdisc); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ae49be00022f..a59085700678 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -84,8 +84,9 @@ struct netem_skb_cb { static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb)); - return (struct netem_skb_cb *)skb->cb; + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } /* init_crandom - initialize correlated random number generator -- cgit v1.2.3 From a94f779f9d82eb2d758a8715eaae5df98e8dcb21 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 14:20:11 -0700 Subject: pkt_sched: make qdisc_class_hash_alloc() static This patch makes the needlessly global qdisc_class_hash_alloc() static. Signed-off-by: Adrian Bunk Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 5219d5f9d754..b0601642e227 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -447,7 +447,7 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); -struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { unsigned int size = n * sizeof(struct hlist_head), i; struct hlist_head *h; -- cgit v1.2.3 From 8d50b53d66a8a6ae41bafbdcabe401467803f33a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 30 Jul 2008 02:37:46 -0700 Subject: pkt_sched: Fix OOPS on ingress qdisc add. Bug report from Steven Jan Springl: Issuing the following command causes a kernel oops: tc qdisc add dev eth0 handle ffff: ingress The problem mostly stems from all of the special case handling of ingress qdiscs. So, to fix this, do the grafting operation the same way we do for TX qdiscs. Which means that dev_activate() and dev_deactivate() now do the "qdisc_sleeping <--> qdisc" transitions on dev->rx_queue too. Future simplifications are possible now, mainly because it is impossible for dev_queue->{qdisc,qdisc_sleeping} to be NULL. There are NULL checks all over to handle the ingress qdisc special case that used to exist before this commit. Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/sched/sch_api.c | 57 ++++++++++++++----------------------------------- net/sched/sch_generic.c | 8 ++++--- 3 files changed, 23 insertions(+), 46 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8d13a9b9f1df..63d6bcddbf46 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2100,7 +2100,7 @@ static int ing_filter(struct sk_buff *skb) rxq = &dev->rx_queue; q = rxq->qdisc; - if (q) { + if (q != &noop_qdisc) { spin_lock(qdisc_lock(q)); result = qdisc_enqueue_root(skb, q); spin_unlock(qdisc_lock(q)); @@ -2113,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->rx_queue.qdisc) + if (skb->dev->rx_queue.qdisc == &noop_qdisc) goto out; if (*pt_prev) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b0601642e227..4840aff47256 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -572,44 +572,21 @@ static u32 qdisc_alloc_handle(struct net_device *dev) static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) { + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; spinlock_t *root_lock; - struct Qdisc *oqdisc; - int ingress; - - ingress = 0; - if (qdisc && qdisc->flags&TCQ_F_INGRESS) - ingress = 1; - - if (ingress) { - oqdisc = dev_queue->qdisc; - } else { - oqdisc = dev_queue->qdisc_sleeping; - } root_lock = qdisc_root_lock(oqdisc); spin_lock_bh(root_lock); - if (ingress) { - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { - /* delete */ - qdisc_reset(oqdisc); - dev_queue->qdisc = NULL; - } else { /* new */ - dev_queue->qdisc = qdisc; - } + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); - } else { - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; - } + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; spin_unlock_bh(root_lock); @@ -678,7 +655,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, ingress = 0; num_q = dev->num_tx_queues; - if (q && q->flags & TCQ_F_INGRESS) { + if ((q && q->flags & TCQ_F_INGRESS) || + (new && new->flags & TCQ_F_INGRESS)) { num_q = 1; ingress = 1; } @@ -692,13 +670,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (!ingress) dev_queue = netdev_get_tx_queue(dev, i); - if (ingress) { - old = dev_graft_qdisc(dev_queue, q); - } else { - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - atomic_inc(&new->refcnt); - } + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + notify_and_destroy(skb, n, classid, old, new); } @@ -817,7 +792,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if (parent) + if (parent && !(sch->flags & TCQ_F_INGRESS)) list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fd2a6cadb115..345838a2e369 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -596,7 +596,7 @@ static void transition_one_qdisc(struct net_device *dev, int *need_watchdog_p = _need_watchdog; rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (new_qdisc != &noqueue_qdisc) + if (need_watchdog_p && new_qdisc != &noqueue_qdisc) *need_watchdog_p = 1; } @@ -619,6 +619,7 @@ void dev_activate(struct net_device *dev) need_watchdog = 0; netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + transition_one_qdisc(dev, &dev->rx_queue, NULL); if (need_watchdog) { dev->trans_start = jiffies; @@ -677,6 +678,7 @@ void dev_deactivate(struct net_device *dev) bool running; netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); + dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -718,7 +720,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); - dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); + dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } @@ -745,6 +747,6 @@ static void shutdown_scheduler_queue(struct net_device *dev, void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); - shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); + shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); WARN_ON(timer_pending(&dev->watchdog_timer)); } -- cgit v1.2.3 From ee7af8264dafa0c8c76a8dc596803966c2e29ebc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 6 Aug 2008 23:35:59 -0700 Subject: pkt_sched: Fix "parent is root" test in qdisc_create(). As noticed by Stephen Hemminger, the root qdisc is denoted by TC_H_ROOT, not zero. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4840aff47256..83b23b55ce36 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -792,7 +792,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if (parent && !(sch->flags & TCQ_F_INGRESS)) + if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; -- cgit v1.2.3 From 827ebd6410005b05b3c930ef6a116666c6986886 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 7 Aug 2008 20:26:40 -0700 Subject: pkt_sched: Fix qdisc config when link is down. Bug reported by Stephen Hemminger. We need to fetch the root from ->qdisc_sleeping not ->qdisc. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 83b23b55ce36..ba1d121f3127 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -189,7 +189,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q, *txq_root = txq->qdisc; + struct Qdisc *q, *txq_root = txq->qdisc_sleeping; if (!(txq_root->flags & TCQ_F_BUILTIN) && txq_root->handle == handle) @@ -793,7 +793,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } } if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) - list_add_tail(&sch->list, &dev_queue->qdisc->list); + list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list); return sch; } @@ -1236,11 +1236,11 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) q_idx = 0; dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) goto done; dev_queue = &dev->rx_queue; - if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) goto done; cont: -- cgit v1.2.3 From 8123b421e8ed944671d7241323ed3198cccb4041 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Aug 2008 23:23:39 -0700 Subject: pkt_sched: Fix ingress deletion and filter attachment. Based upon bug reports by Stephen Hemminger. We still had some cases using ->qdisc instead of ->qdisc_sleeping. Also, qdisc_lookup() should return ingress qdiscs. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ba1d121f3127..bbf149dd7818 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -183,6 +183,21 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ +struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) +{ + struct Qdisc *q; + + if (!(root->flags & TCQ_F_BUILTIN) && + root->handle == handle) + return root; + + list_for_each_entry(q, &root->list, list) { + if (q->handle == handle) + return q; + } + return NULL; +} + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; @@ -191,16 +206,11 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *q, *txq_root = txq->qdisc_sleeping; - if (!(txq_root->flags & TCQ_F_BUILTIN) && - txq_root->handle == handle) - return txq_root; - - list_for_each_entry(q, &txq_root->list, list) { - if (q->handle == handle) - return q; - } + q = qdisc_match_from_root(txq_root, handle); + if (q) + return q; } - return NULL; + return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -908,7 +918,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -978,7 +988,7 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -1529,11 +1539,11 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) t = 0; dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; done: -- cgit v1.2.3 From 1cfa26661a85549063e369e2b40275eeaa7b923c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Aug 2008 18:11:06 -0700 Subject: pkt_sched: Add BH protection for qdisc_stab_lock. Since qdisc_stab_lock is used in qdisc_put_stab(), which is called in BH context from __qdisc_destroy() RCU callback, softirq safe locking is needed. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bbf149dd7818..c25465e5607a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -331,7 +331,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (!s || tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -339,11 +339,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); return stab; } - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) @@ -354,9 +354,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); return stab; } @@ -366,14 +366,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); if (--tab->refcnt == 0) { list_del(&tab->list); kfree(tab); } - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); -- cgit v1.2.3 From 3a76e3716b4e571f5d91a20b6afb412560599083 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 17 Aug 2008 22:02:11 -0700 Subject: pkt_sched: Grab correct lock in notify_and_destroy(). From: Jarek Poplawski When we are destroying non-root qdiscs, we need to lock the root of the qdisc tree not the the qdisc itself. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c25465e5607a..c8dc72e12107 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,9 +638,9 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid qdisc_notify(skb, n, clid, old, new); if (old) { - spin_lock_bh(&old->q.lock); + sch_tree_lock(old); qdisc_destroy(old); - spin_unlock_bh(&old->q.lock); + sch_tree_unlock(old); } } -- cgit v1.2.3 From 8608db031b4d2932d645709e2cfe8fbcd91a7305 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 20:51:18 -0700 Subject: pkt_sched: Never schedule non-root qdiscs. Based upon initial discovery and patch by Jarek Poplawski. The qdisc watchdogs can be attached to any qdisc, not just the root, so make sure we schedule the correct one. CBQ has a similar bug. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c8dc72e12107..98c00847a3d2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -426,7 +426,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - __netif_schedule(wd->qdisc); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4e261ce62f48..47ef492c4ff4 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - __netif_schedule(sch); + __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } -- cgit v1.2.3 From 25bfcd5a78a377ea4c54a3c21e44590e2fc478a6 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 18 Aug 2008 20:53:34 -0700 Subject: pkt_sched: Add lockdep annotation for qdisc locks Qdisc locks are initialized in the same function, qdisc_alloc(), so lockdep can't distinguish tx qdisc lock from rx and reports "possible recursive locking detected" when both these locks are taken eg. while using act_mirred with ifb. This looks like a false positive. Anyway, after this patch these locks will be reported more exactly. Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 98c00847a3d2..7d7070b1eebd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -707,6 +708,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -767,6 +772,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); @@ -774,6 +780,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; -- cgit v1.2.3 From 4d8863a29c4755a0461cd31b6865026187d6c43a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 21:03:15 -0700 Subject: pkt_sched: Don't hold qdisc lock over qdisc_destroy(). Based upon reports by Denys Fedoryshchenko, and feedback and help from Jarek Poplawski and Herbert Xu. We always either: 1) Never made an external reference to this qdisc. or 2) Did a dev_deactivate() which purged all asynchronous references. So do not lock the qdisc when we call qdisc_destroy(), it's illegal anyways as when we drop the lock this is free'd memory. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++----------- net/sched/sch_generic.c | 6 ------ 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7d7070b1eebd..d91a2338877c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,11 +638,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid if (new || old) qdisc_notify(skb, n, clid, old, new); - if (old) { - sch_tree_lock(old); + if (old) qdisc_destroy(old); - sch_tree_unlock(old); - } } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -1092,16 +1089,10 @@ create_n_graft: graft: if (1) { - spinlock_t *root_lock; - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { - if (q) { - root_lock = qdisc_root_lock(q); - spin_lock_bh(root_lock); + if (q) qdisc_destroy(q); - spin_unlock_bh(root_lock); - } return err; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6f96b7bc0809..c3ed4d44fc14 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -518,8 +518,6 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -/* Under qdisc_lock(qdisc) and BH! */ - void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -712,14 +710,10 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { - spinlock_t *root_lock = qdisc_lock(qdisc); - dev_queue->qdisc = qdisc_default; dev_queue->qdisc_sleeping = qdisc_default; - spin_lock_bh(root_lock); qdisc_destroy(qdisc); - spin_unlock_bh(root_lock); } } -- cgit v1.2.3 From e5befbd9525d92bb074b70192eb2c69aae65fc60 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 18 Aug 2008 22:30:01 -0700 Subject: pkt_sched: remove bogus block (cleanup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...Last block local var got just deleted. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d91a2338877c..9372ec41ce84 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1088,14 +1088,13 @@ create_n_graft: } graft: - if (1) { - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); - if (err) { - if (q) - qdisc_destroy(q); - return err; - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } -- cgit v1.2.3 From f3b9605d744df537dee10fd06630f35a62b343ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 22:33:05 -0700 Subject: Revert "pkt_sched: Add BH protection for qdisc_stab_lock." This reverts commit 1cfa26661a85549063e369e2b40275eeaa7b923c. qdisc_destroy() runs fully under RTNL again and not from softint any longer, so this change is no longer needed. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9372ec41ce84..ef0efeca6352 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -332,7 +332,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (!s || tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -340,11 +340,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) @@ -355,9 +355,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } @@ -367,14 +367,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); if (--tab->refcnt == 0) { list_del(&tab->list); kfree(tab); } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); -- cgit v1.2.3 From 2540e0511ea17e25831be543cdf9381e6209950d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 21 Aug 2008 05:11:14 -0700 Subject: pkt_sched: Fix qdisc_watchdog() vs. dev_deactivate() race dev_deactivate() can skip rescheduling of a qdisc by qdisc_watchdog() or other timer calling netif_schedule() after dev_queue_deactivate(). We prevent this checking aliveness before scheduling the timer. Since during deactivation the root qdisc is available only as qdisc_sleeping additional accessor qdisc_root_sleeping() is created. With feedback from Herbert Xu Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++++ net/sched/sch_api.c | 4 ++++ net/sched/sch_cbq.c | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 84d25f2e6188..b1d2cfea89c5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -193,6 +193,11 @@ static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc) return qdisc->dev_queue->qdisc; } +static inline struct Qdisc *qdisc_root_sleeping(struct Qdisc *qdisc) +{ + return qdisc->dev_queue->qdisc_sleeping; +} + /* The qdisc root lock is a mechanism by which to top level * of a qdisc tree can be locked from any qdisc node in the * forest. This allows changing the configuration of some diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ef0efeca6352..45f442d7de47 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -444,6 +444,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_US2NS(expires)); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 47ef492c4ff4..8fa90d68ec6d 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -521,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) struct cbq_sched_data *q = qdisc_priv(cl->qdisc); psched_tdiff_t delay = cl->undertime - q->now; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(cl->qdisc)->state)) + return; + if (!cl->delayed) { psched_time_t sched = q->now; ktime_t expires; -- cgit v1.2.3 From f6e0b239a2657ea8cb67f0d83d0bfdbfd19a481b Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 22 Aug 2008 03:24:05 -0700 Subject: pkt_sched: Fix qdisc list locking Since some qdiscs call qdisc_tree_decrease_qlen() (so qdisc_lookup()) without rtnl_lock(), adding and deleting from a qdisc list needs additional locking. This patch adds global spinlock qdisc_list_lock and wrapper functions for modifying the list. It is considered as a temporary solution until hfsc_dequeue(), netem_dequeue() and tbf_dequeue() (or qdisc_tree_decrease_qlen()) are redone. With feedback from Herbert Xu and David S. Miller. Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + net/sched/sch_api.c | 44 +++++++++++++++++++++++++++++++++++++++----- net/sched/sch_generic.c | 5 ++--- 3 files changed, 42 insertions(+), 8 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 853fe83d9f37..b786a5b09253 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -78,6 +78,7 @@ extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, extern int register_qdisc(struct Qdisc_ops *qops); extern int unregister_qdisc(struct Qdisc_ops *qops); +extern void qdisc_list_del(struct Qdisc *q); extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 45f442d7de47..e7fb9e0d21b4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -199,19 +199,53 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } +/* + * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() + * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() + */ +static DEFINE_SPINLOCK(qdisc_list_lock); + +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + spin_unlock_bh(&qdisc_list_lock); + } +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_del(&q->list); + spin_unlock_bh(&qdisc_list_lock); + } +} +EXPORT_SYMBOL(qdisc_list_del); + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; + struct Qdisc *q; + + spin_lock_bh(&qdisc_list_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q, *txq_root = txq->qdisc_sleeping; + struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - return q; + goto unlock; } - return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + +unlock: + spin_unlock_bh(&qdisc_list_lock); + + return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -810,8 +844,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) - list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list); + + qdisc_list_add(sch); return sch; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c3ed4d44fc14..5f0ade7806a7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -526,10 +526,9 @@ void qdisc_destroy(struct Qdisc *qdisc) !atomic_dec_and_test(&qdisc->refcnt)) return; - if (qdisc->parent) - list_del(&qdisc->list); - #ifdef CONFIG_NET_SCHED + qdisc_list_del(qdisc); + qdisc_put_stab(qdisc->stab); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -- cgit v1.2.3 From 666d9bbedfff7c2c37eab92e715641922dee6864 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:12:52 -0700 Subject: pkt_sched: Fix dev_graft_qdisc() locking During dev_graft_qdisc() dev is deactivated, so qdisc_root_lock() returns wrong lock of noop_qdisc instead of qdisc_sleeping. Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e7fb9e0d21b4..341d558b6e39 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -624,7 +624,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; spinlock_t *root_lock; - root_lock = qdisc_root_lock(oqdisc); + root_lock = qdisc_lock(oqdisc); spin_lock_bh(root_lock); /* Prune old scheduler */ -- cgit v1.2.3 From f7a54c13c7b072d9426bd5cec1cdb8306df5ef55 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:22:07 -0700 Subject: pkt_sched: Use rcu_assign_pointer() to change dev_queue->qdisc These pointers are RCU protected, so proper primitives should be used. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 341d558b6e39..ad9cda1b8c0a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -635,7 +635,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, if (qdisc == NULL) qdisc = &noop_qdisc; dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5f0ade7806a7..9634091ee2f0 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -634,7 +634,7 @@ static void dev_deactivate_queue(struct net_device *dev, if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - dev_queue->qdisc = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); @@ -709,7 +709,7 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { - dev_queue->qdisc = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default; qdisc_destroy(qdisc); -- cgit v1.2.3 From f6f9b93f1624206c802ac9162c9302edaf59bfd9 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:25:17 -0700 Subject: pkt_sched: Fix gen_estimator locks While passing a qdisc root lock to gen_new_estimator() and gen_replace_estimator() dev could be deactivated or even before grafting proper root qdisc as qdisc_sleeping (e.g. qdisc_create), so using qdisc_root_lock() is not enough. This patch adds qdisc_root_sleeping_lock() for this, plus additional checks, where necessary. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 8 ++++++++ net/sched/sch_api.c | 14 +++++++++++--- net/sched/sch_cbq.c | 4 ++-- net/sched/sch_hfsc.c | 4 ++-- net/sched/sch_htb.c | 4 ++-- 5 files changed, 25 insertions(+), 9 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b1d2cfea89c5..ef8a7e2e12e7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,14 @@ static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc) return qdisc_lock(root); } +static inline spinlock_t *qdisc_root_sleeping_lock(struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root_sleeping(qdisc); + + ASSERT_RTNL(); + return qdisc_lock(root); +} + static inline struct net_device *qdisc_dev(struct Qdisc *qdisc) { return qdisc->dev_queue->dev; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9cda1b8c0a..506b709510b6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -830,9 +830,16 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->stab = stab; } if (tca[TCA_RATE]) { + spinlock_t *root_lock; + + if ((sch->parent != TC_H_ROOT) && + !(sch->flags & TCQ_F_INGRESS)) + root_lock = qdisc_root_sleeping_lock(sch); + else + root_lock = qdisc_lock(sch); + err = gen_new_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), - tca[TCA_RATE]); + root_lock, tca[TCA_RATE]); if (err) { /* * Any broken qdiscs that would require @@ -884,7 +891,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); return 0; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 8fa90d68ec6d..9b720adedead 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1839,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1930,7 +1930,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c2b8d9cce3d2..c1e77da8cd09 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0df0df202ed0..97d4761cc31e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1372,7 +1372,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1427,7 +1427,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } -- cgit v1.2.3 From 102396ae65108b026e4e1868e30fa013f45a169e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 29 Aug 2008 14:21:52 -0700 Subject: pkt_sched: Fix locking of qdisc_root with qdisc_root_sleeping_lock() Use qdisc_root_sleeping_lock() instead of qdisc_root_lock() where appropriate. The only difference is while dev is deactivated, when currently we can use a sleeping qdisc with the lock of noop_qdisc. This shouldn't be dangerous since after deactivation root lock could be used only by gen_estimator code, but looks wrong anyway. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- net/sched/cls_route.c | 2 +- net/sched/sch_api.c | 8 ++++---- net/sched/sch_cbq.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_teql.c | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5cafdd4c8018..8eb79e92e94c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -205,7 +205,7 @@ replay: } } - root_lock = qdisc_root_lock(q); + root_lock = qdisc_root_sleeping_lock(q); if (tp == NULL) { /* Proto-tcf does not exist, create new one */ diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 481260a4f10f..e3d8455eebc2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -75,7 +75,7 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) static inline void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { - spinlock_t *root_lock = qdisc_root_lock(q); + spinlock_t *root_lock = qdisc_root_sleeping_lock(q); spin_lock_bh(root_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 506b709510b6..1122c952aa99 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1169,8 +1169,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, qdisc_root_lock(q), &d) < 0) + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1461,8 +1461,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, qdisc_root_lock(q), &d) < 0) + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9b720adedead..8b06fa900482 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1754,7 +1754,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(root_lock); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 97d4761cc31e..d14f02056ae6 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1043,7 +1043,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; @@ -1075,7 +1075,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index fb0294d0b55e..3781e55046d0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -341,7 +341,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - root_lock = qdisc_root_lock(sch); + root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 2c35c678563b..d35ef059abb1 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -161,7 +161,7 @@ teql_destroy(struct Qdisc* sch) txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_lock(txq->qdisc); + root_lock = qdisc_root_sleeping_lock(txq->qdisc); spin_lock_bh(root_lock); qdisc_reset(txq->qdisc); spin_unlock_bh(root_lock); -- cgit v1.2.3 From 95a5afca4a8d2e1cb77e1d4bc6ff9f718dc32f7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Oct 2008 15:24:51 -0700 Subject: net: Remove CONFIG_KMOD from net/ (towards removing CONFIG_KMOD entirely) Some code here depends on CONFIG_KMOD to not try to load protocol modules or similar, replace by CONFIG_MODULES where more than just request_module depends on CONFIG_KMOD and and also use try_then_request_module in ebtables. Signed-off-by: Johannes Berg Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- net/bluetooth/af_bluetooth.c | 8 +------- net/bridge/netfilter/ebtables.c | 15 +++------------ net/can/af_can.c | 4 ++-- net/core/dev.c | 2 -- net/core/rtnetlink.c | 4 ++-- net/dccp/ccid.c | 2 +- net/decnet/dn_dev.c | 2 -- net/ipv4/devinet.c | 2 -- net/ipv4/inet_diag.c | 2 -- net/ipv4/tcp_cong.c | 4 ++-- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 2 +- net/phonet/af_phonet.c | 3 +-- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/ematch.c | 2 +- net/sched/sch_api.c | 2 +- net/socket.c | 2 +- net/sunrpc/auth.c | 2 -- 20 files changed, 20 insertions(+), 46 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6348e078aa4..8f9431a12c6f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -37,10 +37,7 @@ #include #include #include - -#if defined(CONFIG_KMOD) #include -#endif #include @@ -145,11 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; -#if defined(CONFIG_KMOD) - if (!bt_proto[proto]) { + if (!bt_proto[proto]) request_module("bt-proto-%d", proto); - } -#endif err = -EPROTONOSUPPORT; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5bb88eb0aad4..0fa208e86405 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -305,23 +305,14 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, int *error, struct mutex *mutex) { - void *ret; - - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); - } - return ret; + return try_then_request_module( + find_inlist_lock_noload(head, name, error, mutex), + "%s%s", prefix, name); } -#endif static inline struct ebt_table * find_table_lock(const char *name, int *error, struct mutex *mutex) diff --git a/net/can/af_can.c b/net/can/af_can.c index 8035fbf526ae..7d4d2b3c137e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -128,8 +128,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol) if (net != &init_net) return -EAFNOSUPPORT; -#ifdef CONFIG_KMOD - /* try to load protocol module, when CONFIG_KMOD is defined */ +#ifdef CONFIG_MODULES + /* try to load protocol module kernel is modular */ if (!proto_tab[protocol]) { err = request_module("can-proto-%d", protocol); diff --git a/net/core/dev.c b/net/core/dev.c index 1408a083fe4e..868ec0ba8b77 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4956,8 +4956,6 @@ EXPORT_SYMBOL(br_fdb_get_hook); EXPORT_SYMBOL(br_fdb_put_hook); #endif -#ifdef CONFIG_KMOD EXPORT_SYMBOL(dev_load); -#endif EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3630131fa1fa..31f29d2989fd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1040,7 +1040,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES replay: #endif err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); @@ -1129,7 +1129,7 @@ replay: return -EOPNOTSUPP; if (!ops) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (kind[0]) { __rtnl_unlock(); request_module("rtnl-link-%s", kind); diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 4809753d12ae..8fe931a3d7a1 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -154,7 +154,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) struct ccid *ccid = NULL; ccids_read_lock(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ccids[id] == NULL) { /* We only try to load if in process context */ ccids_read_unlock(); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ba352588e344..4fd4a4f74e82 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -490,9 +490,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) return -EFAULT; ifr->ifr_name[IFNAMSIZ-1] = 0; -#ifdef CONFIG_KMOD dev_load(&init_net, ifr->ifr_name); -#endif switch(cmd) { case SIOCGIFADDR: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b12dae2b0b2d..abef49376ac8 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -613,9 +613,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (colon) *colon = 0; -#ifdef CONFIG_KMOD dev_load(net, ifr.ifr_name); -#endif switch (cmd) { case SIOCGIFADDR: /* Get interface address */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 89cb047ab314..564230dabcb8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,11 +53,9 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int type) { -#ifdef CONFIG_KMOD if (!inet_diag_table[type]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, NETLINK_INET_DIAG, type); -#endif mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[type]) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6a250828b767..4ec5b4e97c4e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -115,7 +115,7 @@ int tcp_set_default_congestion_control(const char *name) spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!ca && capable(CAP_SYS_MODULE)) { spin_unlock(&tcp_cong_list_lock); @@ -244,7 +244,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (ca == icsk->icsk_ca_ops) goto out; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES /* not found attempt to autoload module */ if (!ca && capable(CAP_SYS_MODULE)) { rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 08e82d64eb6f..2e4ad9671e19 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -822,7 +822,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); if (!parse_nat_setup) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(); if (request_module("nf-nat-ipv4") < 0) { diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4739f9f961d8..9c0ba17a1ddb 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -137,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) replay: ss = nfnetlink_get_subsys(type); if (!ss) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES nfnl_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); nfnl_lock(); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2fd8afac5f71..480184a857d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -435,7 +435,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) return -EPROTONOSUPPORT; netlink_lock_table(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 9e9c6fce11aa..b9d97effebe3 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -67,11 +67,10 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) } pnp = phonet_proto_get(protocol); -#ifdef CONFIG_KMOD if (pnp == NULL && request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0) pnp = phonet_proto_get(protocol); -#endif + if (pnp == NULL) return -EPROTONOSUPPORT; if (sock->type != pnp->sock_type) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9974b3f04f05..8f457f1e0acf 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -494,7 +494,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES rtnl_unlock(); request_module("act_%s", act_name); rtnl_lock(); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8eb79e92e94c..16e7ac9774e5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -227,7 +227,7 @@ replay: err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES struct nlattr *kind = tca[TCA_KIND]; char name[IFNAMSIZ]; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e6f82e0e6f3..e82519e548d7 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -224,7 +224,7 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES __rtnl_unlock(); request_module("ematch-kind-%u", em_hdr->kind); rtnl_lock(); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1122c952aa99..b16ad2972c6b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -764,7 +764,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { diff --git a/net/socket.c b/net/socket.c index 3e8d4e35c08f..2b7a4b5c9b72 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1142,7 +1142,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, sock->type = type; -#if defined(CONFIG_KMOD) +#ifdef CONFIG_MODULES /* Attempt to load a protocol module if the find failed. * * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 6bfea9ed6869..436bf1b4b76c 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; -#ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); -#endif spin_lock(&rpc_authflavor_lock); ops = auth_flavors[flavor]; if (ops == NULL || !try_module_get(ops->owner)) { -- cgit v1.2.3