diff options
author | David S. Miller <davem@davemloft.net> | 2017-11-03 21:57:35 +0900 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-11-03 21:57:35 +0900 |
commit | 6ee79b6ebf6613f1c5bf2be0c3dca4e51817f2ca (patch) | |
tree | 54915d3a3f2af715f1681da274e3a98f6198e71b | |
parent | aa5fbf07541bbec0d79f4b32415aff2233971853 (diff) | |
parent | 46209401f8f6116bd0b2c2d14a63958e83ffca0b (diff) | |
download | linux-6ee79b6ebf6613f1c5bf2be0c3dca4e51817f2ca.tar.bz2 |
Merge branch 'net-mini_Qdisc'
Jiri Pirko says:
====================
net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath
This patchset's main patch is patch number 2. It carries the
description. Patch 1 is just a dependency.
---
v3->v4:
- rebased to be applicable on top of the current net-next
v2->v3:
- Using head change callback to replace miniq pointer every time tp head
changes. This eliminates one rcu dereference and makes the claim "without
added overhead" valid.
v1->v2:
- Use dev instead of skb->dev in sch_handle_egress as pointed out by Daniel
- Fixed synchronize_rcu_bh() in mini_qdisc_disable and commented
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 9 | ||||
-rw-r--r-- | include/net/pkt_cls.h | 14 | ||||
-rw-r--r-- | include/net/sch_generic.h | 37 | ||||
-rw-r--r-- | net/core/dev.c | 21 | ||||
-rw-r--r-- | net/sched/cls_api.c | 54 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 46 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 45 |
7 files changed, 166 insertions, 60 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e02f79b2110..7de7656550c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1559,6 +1559,8 @@ enum netdev_priv_flags { * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one + * @miniq_ingress: ingress/clsact qdisc specific data for + * ingress processing * @ingress_queue: XXX: need comments on this one * @broadcast: hw bcast address * @@ -1576,7 +1578,8 @@ enum netdev_priv_flags { * @tx_global_lock: XXX: need comments on this one * * @xps_maps: XXX: need comments on this one - * + * @miniq_egress: clsact qdisc specific data for + * egress processing * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1795,7 +1798,7 @@ struct net_device { void __rcu *rx_handler_data; #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *ingress_cl_list; + struct mini_Qdisc __rcu *miniq_ingress; #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS @@ -1826,7 +1829,7 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *egress_cl_list; + struct mini_Qdisc __rcu *miniq_egress; #endif /* These may be needed for future network-power-down code. */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d15c40c7bde7..505d4b71975f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -26,6 +26,8 @@ enum tcf_block_binder_type { struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; }; struct tcf_block_cb; @@ -37,12 +39,10 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei); void tcf_block_put(struct tcf_block *block); -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); static inline struct Qdisc *tcf_block_q(struct tcf_block *block) @@ -82,8 +82,7 @@ int tcf_block_get(struct tcf_block **p_block, } static inline -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei) { return 0; @@ -94,8 +93,7 @@ static inline void tcf_block_put(struct tcf_block *block) } static inline -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c23e938f5b19..c64e62c9450a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -260,9 +260,12 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); + struct tcf_chain { struct tcf_proto __rcu *filter_chain; - struct tcf_proto __rcu **p_filter_chain; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ @@ -901,4 +904,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } +/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. + * The fast path only needs to access filter list and to update stats + */ +struct mini_Qdisc { + struct tcf_proto *filter_list; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; + struct rcu_head rcu; +}; + +static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); +} + +static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) +{ + this_cpu_inc(miniq->cpu_qstats->drops); +} + +struct mini_Qdisc_pair { + struct mini_Qdisc miniq1; + struct mini_Qdisc miniq2; + struct mini_Qdisc __rcu **p_miniq; +}; + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head); +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq); + #endif diff --git a/net/core/dev.c b/net/core/dev.c index 24ac9083bc13..1423cf4d695c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3274,22 +3274,22 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { - struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; - if (!cl) + if (!miniq) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); *ret = NET_XMIT_DROP; kfree_skb(skb); return NULL; @@ -4189,7 +4189,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so @@ -4197,8 +4197,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * that are not configured with an ingress qdisc will bail * out here. */ - if (!cl) + if (!miniq) return skb; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4206,15 +4207,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_at_ingress = 1; - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); kfree_skb(skb); return NULL; case TC_ACT_STOLEN: diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8d1885abee83..206e19f4fc01 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -195,12 +195,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, return chain; } +static void tcf_chain_head_change(struct tcf_chain *chain, + struct tcf_proto *tp_head) +{ + if (chain->chain_head_change) + chain->chain_head_change(tp_head, + chain->chain_head_change_priv); +} + static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - if (chain->p_filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, NULL); + tcf_chain_head_change(chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_chain_put(chain); @@ -242,13 +249,6 @@ void tcf_chain_put(struct tcf_chain *chain) } EXPORT_SYMBOL(tcf_chain_put); -static void -tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, - struct tcf_proto __rcu **p_filter_chain) -{ - chain->p_filter_chain = p_filter_chain; -} - static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, enum tc_block_command command) @@ -276,8 +276,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); } -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); @@ -295,7 +294,9 @@ int tcf_block_get_ext(struct tcf_block **p_block, err = -ENOMEM; goto err_chain_create; } - tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + WARN_ON(!ei->chain_head_change); + chain->chain_head_change = ei->chain_head_change; + chain->chain_head_change_priv = ei->chain_head_change_priv; block->net = qdisc_net(q); block->q = q; tcf_block_offload_bind(block, q, ei); @@ -308,12 +309,23 @@ err_chain_create: } EXPORT_SYMBOL(tcf_block_get_ext); +static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) +{ + struct tcf_proto __rcu **p_filter_chain = priv; + + rcu_assign_pointer(*p_filter_chain, tp_head); +} + int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) { - struct tcf_block_ext_info ei = {0, }; + struct tcf_block_ext_info ei = { + .chain_head_change = tcf_chain_head_change_dflt, + .chain_head_change_priv = p_filter_chain, + }; - return tcf_block_get_ext(p_block, p_filter_chain, q, &ei); + WARN_ON(!p_filter_chain); + return tcf_block_get_ext(p_block, q, &ei); } EXPORT_SYMBOL(tcf_block_get); @@ -334,8 +346,7 @@ static void tcf_block_put_final(struct work_struct *work) * actions should be all removed after flushing. However, filters are now * destroyed in tc filter workqueue with RTNL lock, they can not race here. */ -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct tcf_chain *chain, *tmp; @@ -361,7 +372,7 @@ void tcf_block_put(struct tcf_block *block) if (!block) return; - tcf_block_put_ext(block, NULL, block->q, &ei); + tcf_block_put_ext(block, block->q, &ei); } EXPORT_SYMBOL(tcf_block_put); @@ -537,9 +548,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - if (chain->p_filter_chain && - *chain_info->pprev == chain->filter_chain) - rcu_assign_pointer(*chain->p_filter_chain, tp); + if (*chain_info->pprev == chain->filter_chain) + tcf_chain_head_change(chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); tcf_chain_hold(chain); @@ -551,8 +561,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, { struct tcf_proto *next = rtnl_dereference(chain_info->next); - if (chain->p_filter_chain && tp == chain->filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, next); + if (tp == chain->filter_chain) + tcf_chain_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); tcf_chain_put(chain); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index aa74aa42b5d7..3839cbbdc32b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1024,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, } } EXPORT_SYMBOL(psched_ratecfg_precompute); + +static void mini_qdisc_rcu_func(struct rcu_head *head) +{ +} + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head) +{ + struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + struct mini_Qdisc *miniq; + + if (!tp_head) { + RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + return; + } + + miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? + &miniqp->miniq1 : &miniqp->miniq2; + + /* We need to make sure that readers won't see the miniq + * we are about to modify. So wait until previous call_rcu_bh callback + * is done. + */ + rcu_barrier_bh(); + miniq->filter_list = tp_head; + rcu_assign_pointer(*miniqp->p_miniq, miniq); + + if (miniq_old) + /* This is counterpart of the rcu barrier above. We need to + * block potential new user of miniq_old until all readers + * are not seeing it. + */ + call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); +} +EXPORT_SYMBOL(mini_qdisc_pair_swap); + +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq) +{ + miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; + miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; + miniqp->p_miniq = p_miniq; +} +EXPORT_SYMBOL(mini_qdisc_pair_init); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index b599db26d34b..5ecc38f35d47 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -21,6 +21,7 @@ struct ingress_sched_data { struct tcf_block *block; struct tcf_block_ext_info block_info; + struct mini_Qdisc_pair miniqp; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) @@ -54,16 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) return q->block; } +static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) +{ + struct mini_Qdisc_pair *miniqp = priv; + + mini_qdisc_pair_swap(miniqp, tp_head); +} + static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); int err; + mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); + q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->block_info.chain_head_change = clsact_chain_head_change; + q->block_info.chain_head_change_priv = &q->miniqp; - err = tcf_block_get_ext(&q->block, &dev->ingress_cl_list, - sch, &q->block_info); + err = tcf_block_get_ext(&q->block, sch, &q->block_info); if (err) return err; @@ -76,10 +87,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); - tcf_block_put_ext(q->block, &dev->ingress_cl_list, - sch, &q->block_info); + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -122,6 +131,8 @@ struct clsact_sched_data { struct tcf_block *egress_block; struct tcf_block_ext_info ingress_block_info; struct tcf_block_ext_info egress_block_info; + struct mini_Qdisc_pair miniqp_ingress; + struct mini_Qdisc_pair miniqp_egress; }; static unsigned long clsact_find(struct Qdisc *sch, u32 classid) @@ -161,17 +172,23 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); + q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->ingress_block_info.chain_head_change = clsact_chain_head_change; + q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; - err = tcf_block_get_ext(&q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); if (err) return err; + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); + q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; + q->egress_block_info.chain_head_change = clsact_chain_head_change; + q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; - err = tcf_block_get_ext(&q->egress_block, &dev->egress_cl_list, - sch, &q->egress_block_info); + err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) goto err_egress_block_get; @@ -183,20 +200,16 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) return 0; err_egress_block_get: - tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); return err; } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); - tcf_block_put_ext(q->egress_block, &dev->egress_cl_list, - sch, &q->egress_block_info); - tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); |