summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-11-03 21:57:35 +0900
committerDavid S. Miller <davem@davemloft.net>2017-11-03 21:57:35 +0900
commit6ee79b6ebf6613f1c5bf2be0c3dca4e51817f2ca (patch)
tree54915d3a3f2af715f1681da274e3a98f6198e71b
parentaa5fbf07541bbec0d79f4b32415aff2233971853 (diff)
parent46209401f8f6116bd0b2c2d14a63958e83ffca0b (diff)
downloadlinux-6ee79b6ebf6613f1c5bf2be0c3dca4e51817f2ca.tar.bz2
Merge branch 'net-mini_Qdisc'
Jiri Pirko says: ==================== net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath This patchset's main patch is patch number 2. It carries the description. Patch 1 is just a dependency. --- v3->v4: - rebased to be applicable on top of the current net-next v2->v3: - Using head change callback to replace miniq pointer every time tp head changes. This eliminates one rcu dereference and makes the claim "without added overhead" valid. v1->v2: - Use dev instead of skb->dev in sch_handle_egress as pointed out by Daniel - Fixed synchronize_rcu_bh() in mini_qdisc_disable and commented ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h9
-rw-r--r--include/net/pkt_cls.h14
-rw-r--r--include/net/sch_generic.h37
-rw-r--r--net/core/dev.c21
-rw-r--r--net/sched/cls_api.c54
-rw-r--r--net/sched/sch_generic.c46
-rw-r--r--net/sched/sch_ingress.c45
7 files changed, 166 insertions, 60 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e02f79b2110..7de7656550c2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1559,6 +1559,8 @@ enum netdev_priv_flags {
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
+ * @miniq_ingress: ingress/clsact qdisc specific data for
+ * ingress processing
* @ingress_queue: XXX: need comments on this one
* @broadcast: hw bcast address
*
@@ -1576,7 +1578,8 @@ enum netdev_priv_flags {
* @tx_global_lock: XXX: need comments on this one
*
* @xps_maps: XXX: need comments on this one
- *
+ * @miniq_egress: clsact qdisc specific data for
+ * egress processing
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
* @watchdog_timer: List of timers
@@ -1795,7 +1798,7 @@ struct net_device {
void __rcu *rx_handler_data;
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto __rcu *ingress_cl_list;
+ struct mini_Qdisc __rcu *miniq_ingress;
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
@@ -1826,7 +1829,7 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
#endif
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto __rcu *egress_cl_list;
+ struct mini_Qdisc __rcu *miniq_egress;
#endif
/* These may be needed for future network-power-down code. */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index d15c40c7bde7..505d4b71975f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -26,6 +26,8 @@ enum tcf_block_binder_type {
struct tcf_block_ext_info {
enum tcf_block_binder_type binder_type;
+ tcf_chain_head_change_t *chain_head_change;
+ void *chain_head_change_priv;
};
struct tcf_block_cb;
@@ -37,12 +39,10 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
void tcf_chain_put(struct tcf_chain *chain);
int tcf_block_get(struct tcf_block **p_block,
struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q);
-int tcf_block_get_ext(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
struct tcf_block_ext_info *ei);
void tcf_block_put(struct tcf_block *block);
-void tcf_block_put_ext(struct tcf_block *block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei);
static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
@@ -82,8 +82,7 @@ int tcf_block_get(struct tcf_block **p_block,
}
static inline
-int tcf_block_get_ext(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
return 0;
@@ -94,8 +93,7 @@ static inline void tcf_block_put(struct tcf_block *block)
}
static inline
-void tcf_block_put_ext(struct tcf_block *block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
}
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c23e938f5b19..c64e62c9450a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -260,9 +260,12 @@ struct qdisc_skb_cb {
unsigned char data[QDISC_CB_PRIV_LEN];
};
+typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
+
struct tcf_chain {
struct tcf_proto __rcu *filter_chain;
- struct tcf_proto __rcu **p_filter_chain;
+ tcf_chain_head_change_t *chain_head_change;
+ void *chain_head_change_priv;
struct list_head list;
struct tcf_block *block;
u32 index; /* chain index */
@@ -901,4 +904,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
+/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
+ * The fast path only needs to access filter list and to update stats
+ */
+struct mini_Qdisc {
+ struct tcf_proto *filter_list;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_queue __percpu *cpu_qstats;
+ struct rcu_head rcu;
+};
+
+static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
+ const struct sk_buff *skb)
+{
+ bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+}
+
+static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+{
+ this_cpu_inc(miniq->cpu_qstats->drops);
+}
+
+struct mini_Qdisc_pair {
+ struct mini_Qdisc miniq1;
+ struct mini_Qdisc miniq2;
+ struct mini_Qdisc __rcu **p_miniq;
+};
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+ struct tcf_proto *tp_head);
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+ struct mini_Qdisc __rcu **p_miniq);
+
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 24ac9083bc13..1423cf4d695c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3274,22 +3274,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
@@ -4189,7 +4189,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
@@ -4197,8 +4197,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4206,15 +4207,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8d1885abee83..206e19f4fc01 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -195,12 +195,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
return chain;
}
+static void tcf_chain_head_change(struct tcf_chain *chain,
+ struct tcf_proto *tp_head)
+{
+ if (chain->chain_head_change)
+ chain->chain_head_change(tp_head,
+ chain->chain_head_change_priv);
+}
+
static void tcf_chain_flush(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- if (chain->p_filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+ tcf_chain_head_change(chain, NULL);
while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
tcf_chain_put(chain);
@@ -242,13 +249,6 @@ void tcf_chain_put(struct tcf_chain *chain)
}
EXPORT_SYMBOL(tcf_chain_put);
-static void
-tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
- struct tcf_proto __rcu **p_filter_chain)
-{
- chain->p_filter_chain = p_filter_chain;
-}
-
static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei,
enum tc_block_command command)
@@ -276,8 +276,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
}
-int tcf_block_get_ext(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
@@ -295,7 +294,9 @@ int tcf_block_get_ext(struct tcf_block **p_block,
err = -ENOMEM;
goto err_chain_create;
}
- tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ WARN_ON(!ei->chain_head_change);
+ chain->chain_head_change = ei->chain_head_change;
+ chain->chain_head_change_priv = ei->chain_head_change_priv;
block->net = qdisc_net(q);
block->q = q;
tcf_block_offload_bind(block, q, ei);
@@ -308,12 +309,23 @@ err_chain_create:
}
EXPORT_SYMBOL(tcf_block_get_ext);
+static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
+{
+ struct tcf_proto __rcu **p_filter_chain = priv;
+
+ rcu_assign_pointer(*p_filter_chain, tp_head);
+}
+
int tcf_block_get(struct tcf_block **p_block,
struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
{
- struct tcf_block_ext_info ei = {0, };
+ struct tcf_block_ext_info ei = {
+ .chain_head_change = tcf_chain_head_change_dflt,
+ .chain_head_change_priv = p_filter_chain,
+ };
- return tcf_block_get_ext(p_block, p_filter_chain, q, &ei);
+ WARN_ON(!p_filter_chain);
+ return tcf_block_get_ext(p_block, q, &ei);
}
EXPORT_SYMBOL(tcf_block_get);
@@ -334,8 +346,7 @@ static void tcf_block_put_final(struct work_struct *work)
* actions should be all removed after flushing. However, filters are now
* destroyed in tc filter workqueue with RTNL lock, they can not race here.
*/
-void tcf_block_put_ext(struct tcf_block *block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
struct tcf_chain *chain, *tmp;
@@ -361,7 +372,7 @@ void tcf_block_put(struct tcf_block *block)
if (!block)
return;
- tcf_block_put_ext(block, NULL, block->q, &ei);
+ tcf_block_put_ext(block, block->q, &ei);
}
EXPORT_SYMBOL(tcf_block_put);
@@ -537,9 +548,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- if (chain->p_filter_chain &&
- *chain_info->pprev == chain->filter_chain)
- rcu_assign_pointer(*chain->p_filter_chain, tp);
+ if (*chain_info->pprev == chain->filter_chain)
+ tcf_chain_head_change(chain, tp);
RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
tcf_chain_hold(chain);
@@ -551,8 +561,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
{
struct tcf_proto *next = rtnl_dereference(chain_info->next);
- if (chain->p_filter_chain && tp == chain->filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, next);
+ if (tp == chain->filter_chain)
+ tcf_chain_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
tcf_chain_put(chain);
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aa74aa42b5d7..3839cbbdc32b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1024,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
}
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
+
+static void mini_qdisc_rcu_func(struct rcu_head *head)
+{
+}
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+ struct tcf_proto *tp_head)
+{
+ struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ struct mini_Qdisc *miniq;
+
+ if (!tp_head) {
+ RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ return;
+ }
+
+ miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
+
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So wait until previous call_rcu_bh callback
+ * is done.
+ */
+ rcu_barrier_bh();
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+
+ if (miniq_old)
+ /* This is counterpart of the rcu barrier above. We need to
+ * block potential new user of miniq_old until all readers
+ * are not seeing it.
+ */
+ call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+}
+EXPORT_SYMBOL(mini_qdisc_pair_swap);
+
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+ struct mini_Qdisc __rcu **p_miniq)
+{
+ miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->p_miniq = p_miniq;
+}
+EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index b599db26d34b..5ecc38f35d47 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -21,6 +21,7 @@
struct ingress_sched_data {
struct tcf_block *block;
struct tcf_block_ext_info block_info;
+ struct mini_Qdisc_pair miniqp;
};
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -54,16 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
return q->block;
}
+static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
+{
+ struct mini_Qdisc_pair *miniqp = priv;
+
+ mini_qdisc_pair_swap(miniqp, tp_head);
+}
+
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
+ mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+
q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->block_info.chain_head_change = clsact_chain_head_change;
+ q->block_info.chain_head_change_priv = &q->miniqp;
- err = tcf_block_get_ext(&q->block, &dev->ingress_cl_list,
- sch, &q->block_info);
+ err = tcf_block_get_ext(&q->block, sch, &q->block_info);
if (err)
return err;
@@ -76,10 +87,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
- tcf_block_put_ext(q->block, &dev->ingress_cl_list,
- sch, &q->block_info);
+ tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -122,6 +131,8 @@ struct clsact_sched_data {
struct tcf_block *egress_block;
struct tcf_block_ext_info ingress_block_info;
struct tcf_block_ext_info egress_block_info;
+ struct mini_Qdisc_pair miniqp_ingress;
+ struct mini_Qdisc_pair miniqp_egress;
};
static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
@@ -161,17 +172,23 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+
q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->ingress_block_info.chain_head_change = clsact_chain_head_change;
+ q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
- err = tcf_block_get_ext(&q->ingress_block, &dev->ingress_cl_list,
- sch, &q->ingress_block_info);
+ err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
if (err)
return err;
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+
q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+ q->egress_block_info.chain_head_change = clsact_chain_head_change;
+ q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
- err = tcf_block_get_ext(&q->egress_block, &dev->egress_cl_list,
- sch, &q->egress_block_info);
+ err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
if (err)
goto err_egress_block_get;
@@ -183,20 +200,16 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
return 0;
err_egress_block_get:
- tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list,
- sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
return err;
}
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
- tcf_block_put_ext(q->egress_block, &dev->egress_cl_list,
- sch, &q->egress_block_info);
- tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list,
- sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
net_dec_ingress_queue();
net_dec_egress_queue();