summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/cls_api.c30
-rw-r--r--net/sched/cls_basic.c80
-rw-r--r--net/sched/cls_bpf.c94
-rw-r--r--net/sched/cls_cgroup.c77
-rw-r--r--net/sched/cls_flow.c145
-rw-r--r--net/sched/cls_fw.c113
-rw-r--r--net/sched/cls_route.c226
-rw-r--r--net/sched/cls_rsvp.h160
-rw-r--r--net/sched/cls_tcindex.c258
-rw-r--r--net/sched/cls_u32.c403
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_atm.c20
-rw-r--r--net/sched/sch_cbq.c11
-rw-r--r--net/sched/sch_choke.c15
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c9
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c14
-rw-r--r--net/sched/sch_generic.c31
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c23
-rw-r--r--net/sched/sch_ingress.c8
-rw-r--r--net/sched/sch_mqprio.c6
-rw-r--r--net/sched/sch_multiq.c8
-rw-r--r--net/sched/sch_prio.c11
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_sfb.c15
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c6
-rw-r--r--net/sched/sch_teql.c18
31 files changed, 1154 insertions, 687 deletions
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0566e4606a4a..f32bcb094915 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -231,7 +231,7 @@ override:
if (ret != ACT_P_CREATED)
return ret;
- police->tcfp_t_c = ktime_to_ns(ktime_get());
+ police->tcfp_t_c = ktime_get_ns();
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(hinfo);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -279,7 +279,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
return police->tcfp_result;
}
- now = ktime_to_ns(ktime_get());
+ now = ktime_get_ns();
toks = min_t(s64, now - police->tcfp_t_c,
police->tcfp_burst);
if (police->peak_present) {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c28b0d327b12..e547efdaba9d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -117,7 +117,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
- spinlock_t *root_lock;
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -125,7 +124,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
u32 parent;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto **back, **chain;
+ struct tcf_proto __rcu **back;
+ struct tcf_proto __rcu **chain;
struct tcf_proto *tp;
const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
@@ -197,7 +197,9 @@ replay:
goto errout;
/* Check the chain for existence of proto-tcf with this priority */
- for (back = chain; (tp = *back) != NULL; back = &tp->next) {
+ for (back = chain;
+ (tp = rtnl_dereference(*back)) != NULL;
+ back = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
@@ -209,8 +211,6 @@ replay:
}
}
- root_lock = qdisc_root_sleeping_lock(q);
-
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
@@ -259,7 +259,8 @@ replay:
}
tp->ops = tp_ops;
tp->protocol = protocol;
- tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
+ tp->prio = nprio ? :
+ TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
tp->q = q;
tp->classify = tp_ops->classify;
tp->classid = parent;
@@ -280,9 +281,9 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- spin_lock_bh(root_lock);
- *back = tp->next;
- spin_unlock_bh(root_lock);
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
tcf_destroy(tp);
@@ -322,10 +323,8 @@ replay:
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
if (tp_created) {
- spin_lock_bh(root_lock);
- tp->next = *back;
- *back = tp;
- spin_unlock_bh(root_lock);
+ RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
+ rcu_assign_pointer(*back, tp);
}
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
@@ -420,7 +419,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
int s_t;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto *tp, **chain;
+ struct tcf_proto *tp, __rcu **chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
@@ -454,7 +453,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
- for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+ for (tp = rtnl_dereference(*chain), t = 0;
+ tp; tp = rtnl_dereference(tp->next), t++) {
if (t < s_t)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0ae1813e3e90..1937298d6775 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -24,6 +24,7 @@
struct basic_head {
u32 hgenerator;
struct list_head flist;
+ struct rcu_head rcu;
};
struct basic_filter {
@@ -31,17 +32,19 @@ struct basic_filter {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_result res;
+ struct tcf_proto *tp;
struct list_head link;
+ struct rcu_head rcu;
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
int r;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rcu_dereference_bh(tp->root);
struct basic_filter *f;
- list_for_each_entry(f, &head->flist, link) {
+ list_for_each_entry_rcu(f, &head->flist, link) {
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
*res = f->res;
@@ -56,7 +59,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
{
unsigned long l = 0UL;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
if (head == NULL)
@@ -81,12 +84,15 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
-static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
+static void basic_delete_filter(struct rcu_head *head)
{
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
tcf_em_tree_destroy(tp, &f->ematches);
@@ -95,27 +101,26 @@ static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
static void basic_destroy(struct tcf_proto *tp)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
list_for_each_entry_safe(f, n, &head->flist, link) {
- list_del(&f->link);
- basic_delete_filter(tp, f);
+ list_del_rcu(&f->link);
+ call_rcu(&f->rcu, basic_delete_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *t, *f = (struct basic_filter *) arg;
list_for_each_entry(t, &head->flist, link)
if (t == f) {
- tcf_tree_lock(tp);
- list_del(&t->link);
- tcf_tree_unlock(tp);
- basic_delete_filter(tp, t);
+ list_del_rcu(&t->link);
+ call_rcu(&t->rcu, basic_delete_filter);
return 0;
}
@@ -152,6 +157,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
tcf_exts_change(tp, &f->exts, &e);
tcf_em_tree_change(tp, &f->ematches, &t);
+ f->tp = tp;
return 0;
errout:
@@ -164,9 +170,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca, unsigned long *arg, bool ovr)
{
int err;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_BASIC_MAX + 1];
- struct basic_filter *f = (struct basic_filter *) *arg;
+ struct basic_filter *fold = (struct basic_filter *) *arg;
+ struct basic_filter *fnew;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -176,22 +183,23 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
- if (handle && f->handle != handle)
+ if (fold != NULL) {
+ if (handle && fold->handle != handle)
return -EINVAL;
- return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
}
err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (fnew == NULL)
goto errout;
- tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
- if (handle)
- f->handle = handle;
- else {
+ if (handle) {
+ fnew->handle = handle;
+ } else if (fold) {
+ fnew->handle = fold->handle;
+ } else {
unsigned int i = 0x80000000;
do {
if (++head->hgenerator == 0x7FFFFFFF)
@@ -203,29 +211,31 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- f->handle = head->hgenerator;
+ fnew->handle = head->hgenerator;
}
- err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
if (err < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&f->link, &head->flist);
- tcf_tree_unlock(tp);
- *arg = (unsigned long) f;
+ *arg = (unsigned long)fnew;
+
+ if (fold) {
+ list_replace_rcu(&fold->link, &fnew->link);
+ call_rcu(&fold->rcu, basic_delete_filter);
+ } else {
+ list_add_rcu(&fnew->link, &head->flist);
+ }
return 0;
errout:
- if (*arg == 0UL && f)
- kfree(f);
-
+ kfree(fnew);
return err;
}
static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0e30d58149da..4e3f5bfc0b26 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
+ struct rcu_head rcu;
};
struct cls_bpf_prog {
@@ -37,6 +38,8 @@ struct cls_bpf_prog {
struct list_head link;
u32 handle;
u16 bpf_len;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -49,11 +52,11 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
struct cls_bpf_prog *prog;
int ret;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res = BPF_PROG_RUN(prog->filter, skb);
if (filter_res == 0)
@@ -81,8 +84,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
- INIT_LIST_HEAD(&head->plist);
- tp->root = head;
+ INIT_LIST_HEAD_RCU(&head->plist);
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -98,18 +101,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
kfree(prog);
}
+static void __cls_bpf_delete_prog(struct rcu_head *rcu)
+{
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ cls_bpf_delete_prog(prog->tp, prog);
+}
+
static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
list_for_each_entry(prog, &head->plist, link) {
if (prog == todel) {
- tcf_tree_lock(tp);
- list_del(&prog->link);
- tcf_tree_unlock(tp);
-
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
return 0;
}
}
@@ -119,27 +126,28 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_bpf_destroy(struct tcf_proto *tp)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
- list_del(&prog->link);
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
unsigned long ret = 0UL;
if (head == NULL)
return 0UL;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
break;
@@ -158,10 +166,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct sock_filter *bpf_ops, *bpf_old;
+ struct sock_filter *bpf_ops;
struct tcf_exts exts;
struct sock_fprog_kern tmp;
- struct bpf_prog *fp, *fp_old;
+ struct bpf_prog *fp;
u16 bpf_size, bpf_len;
u32 classid;
int ret;
@@ -197,26 +205,15 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if (ret)
goto errout_free;
- tcf_tree_lock(tp);
- fp_old = prog->filter;
- bpf_old = prog->bpf_ops;
-
prog->bpf_len = bpf_len;
prog->bpf_ops = bpf_ops;
prog->filter = fp;
prog->res.classid = classid;
- tcf_tree_unlock(tp);
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
- if (fp_old)
- bpf_prog_destroy(fp_old);
- if (bpf_old)
- kfree(bpf_old);
-
return 0;
-
errout_free:
kfree(bpf_ops);
errout:
@@ -244,9 +241,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct cls_bpf_head *head = tp->root;
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ struct cls_bpf_prog *prog;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -256,18 +254,19 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
return ret;
- if (prog != NULL) {
- if (handle && prog->handle != handle)
- return -EINVAL;
- return cls_bpf_modify_existing(net, tp, prog, base, tb,
- tca[TCA_RATE], ovr);
- }
-
prog = kzalloc(sizeof(*prog), GFP_KERNEL);
- if (prog == NULL)
+ if (!prog)
return -ENOBUFS;
tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+
+ if (oldprog) {
+ if (handle && oldprog->handle != handle) {
+ ret = -EINVAL;
+ goto errout;
+ }
+ }
+
if (handle == 0)
prog->handle = cls_bpf_grab_new_handle(tp, head);
else
@@ -281,16 +280,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&prog->link, &head->plist);
- tcf_tree_unlock(tp);
+ if (oldprog) {
+ list_replace_rcu(&prog->link, &oldprog->link);
+ call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+ } else {
+ list_add_rcu(&prog->link, &head->plist);
+ }
*arg = (unsigned long) prog;
-
return 0;
errout:
- if (*arg == 0UL && prog)
- kfree(prog);
+ kfree(prog);
return ret;
}
@@ -339,10 +339,10 @@ nla_put_failure:
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cacf01bd04f0..15c34d4ccd9e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,17 +22,17 @@ struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
u32 classid;
- rcu_read_lock();
classid = task_cls_state(current)->classid;
- rcu_read_unlock();
/*
* Due to the nature of the classifier it is required to ignore all
@@ -80,13 +80,25 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void cls_cgroup_destroy_rcu(struct rcu_head *root)
+{
+ struct cls_cgroup_head *head = container_of(root,
+ struct cls_cgroup_head,
+ rcu);
+
+ tcf_exts_destroy(head->tp, &head->exts);
+ tcf_em_tree_destroy(head->tp, &head->ematches);
+ kfree(head);
+}
+
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ struct cls_cgroup_head *new;
struct tcf_ematch_tree t;
struct tcf_exts e;
int err;
@@ -94,53 +106,60 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!tca[TCA_OPTIONS])
return -EINVAL;
- if (head == NULL) {
- if (!handle)
- return -EINVAL;
-
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
+ if (!head && !handle)
+ return -EINVAL;
- tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- head->handle = handle;
+ if (head && handle != head->handle)
+ return -ENOENT;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
- }
+ new = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!new)
+ return -ENOBUFS;
- if (handle != head->handle)
- return -ENOENT;
+ tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ if (head)
+ new->handle = head->handle;
+ else
+ new->handle = handle;
+ new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
cgroup_policy);
if (err < 0)
- return err;
+ goto errout;
tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
- return err;
+ goto errout;
err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
- if (err < 0)
- return err;
+ if (err < 0) {
+ tcf_exts_destroy(tp, &e);
+ goto errout;
+ }
- tcf_exts_change(tp, &head->exts, &e);
- tcf_em_tree_change(tp, &head->ematches, &t);
+ tcf_exts_change(tp, &new->exts, &e);
+ tcf_em_tree_change(tp, &new->ematches, &t);
+ rcu_assign_pointer(tp->root, new);
+ if (head)
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
return 0;
+errout:
+ kfree(new);
+ return err;
}
static void cls_cgroup_destroy(struct tcf_proto *tp)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (head) {
tcf_exts_destroy(tp, &head->exts);
tcf_em_tree_destroy(tp, &head->ematches);
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
}
@@ -151,7 +170,7 @@ static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
@@ -167,7 +186,7 @@ skip:
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 35be16f7c192..95736fa479f3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -34,12 +34,14 @@
struct flow_head {
struct list_head filters;
+ struct rcu_head rcu;
};
struct flow_filter {
struct list_head list;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
struct timer_list perturb_timer;
u32 perturb_period;
u32 handle;
@@ -54,6 +56,7 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
+ struct rcu_head rcu;
};
static inline u32 addr_fold(void *addr)
@@ -276,14 +279,14 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rcu_dereference_bh(tp->root);
struct flow_filter *f;
u32 keymask;
u32 classid;
unsigned int n, key;
int r;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
u32 keys[FLOW_KEY_MAX + 1];
struct flow_keys flow_keys;
@@ -346,13 +349,23 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
+static void flow_destroy_filter(struct rcu_head *head)
+{
+ struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+ del_timer_sync(&f->perturb_timer);
+ tcf_exts_destroy(f->tp, &f->exts);
+ tcf_em_tree_destroy(f->tp, &f->ematches);
+ kfree(f);
+}
+
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct flow_head *head = tp->root;
- struct flow_filter *f;
+ struct flow_head *head = rtnl_dereference(tp->root);
+ struct flow_filter *fold, *fnew;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FLOW_MAX + 1];
struct tcf_exts e;
@@ -401,20 +414,42 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err1;
- f = (struct flow_filter *)*arg;
- if (f != NULL) {
+ err = -ENOBUFS;
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ goto err2;
+
+ fold = (struct flow_filter *)*arg;
+ if (fold) {
err = -EINVAL;
- if (f->handle != handle && handle)
+ if (fold->handle != handle && handle)
goto err2;
- mode = f->mode;
+ /* Copy fold into fnew */
+ fnew->handle = fold->handle;
+ fnew->keymask = fold->keymask;
+ fnew->tp = fold->tp;
+
+ fnew->handle = fold->handle;
+ fnew->nkeys = fold->nkeys;
+ fnew->keymask = fold->keymask;
+ fnew->mode = fold->mode;
+ fnew->mask = fold->mask;
+ fnew->xor = fold->xor;
+ fnew->rshift = fold->rshift;
+ fnew->addend = fold->addend;
+ fnew->divisor = fold->divisor;
+ fnew->baseclass = fold->baseclass;
+ fnew->hashrnd = fold->hashrnd;
+
+ mode = fold->mode;
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
goto err2;
if (mode == FLOW_MODE_HASH)
- perturb_period = f->perturb_period;
+ perturb_period = fold->perturb_period;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
goto err2;
@@ -444,83 +479,70 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
- err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
- goto err2;
-
- f->handle = handle;
- f->mask = ~0U;
- tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-
- get_random_bytes(&f->hashrnd, 4);
- f->perturb_timer.function = flow_perturbation;
- f->perturb_timer.data = (unsigned long)f;
- init_timer_deferrable(&f->perturb_timer);
+ fnew->handle = handle;
+ fnew->mask = ~0U;
+ fnew->tp = tp;
+ get_random_bytes(&fnew->hashrnd, 4);
+ tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
}
- tcf_exts_change(tp, &f->exts, &e);
- tcf_em_tree_change(tp, &f->ematches, &t);
+ fnew->perturb_timer.function = flow_perturbation;
+ fnew->perturb_timer.data = (unsigned long)fnew;
+ init_timer_deferrable(&fnew->perturb_timer);
- tcf_tree_lock(tp);
+ tcf_exts_change(tp, &fnew->exts, &e);
+ tcf_em_tree_change(tp, &fnew->ematches, &t);
if (tb[TCA_FLOW_KEYS]) {
- f->keymask = keymask;
- f->nkeys = nkeys;
+ fnew->keymask = keymask;
+ fnew->nkeys = nkeys;
}
- f->mode = mode;
+ fnew->mode = mode;
if (tb[TCA_FLOW_MASK])
- f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
+ fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
if (tb[TCA_FLOW_XOR])
- f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
+ fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
if (tb[TCA_FLOW_RSHIFT])
- f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
+ fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
if (tb[TCA_FLOW_ADDEND])
- f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
+ fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
if (tb[TCA_FLOW_DIVISOR])
- f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
+ fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
if (baseclass)
- f->baseclass = baseclass;
+ fnew->baseclass = baseclass;
- f->perturb_period = perturb_period;
- del_timer(&f->perturb_timer);
+ fnew->perturb_period = perturb_period;
if (perturb_period)
- mod_timer(&f->perturb_timer, jiffies + perturb_period);
+ mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
if (*arg == 0)
- list_add_tail(&f->list, &head->filters);
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ else
+ list_replace_rcu(&fnew->list, &fold->list);
- tcf_tree_unlock(tp);
+ *arg = (unsigned long)fnew;
- *arg = (unsigned long)f;
+ if (fold)
+ call_rcu(&fold->rcu, flow_destroy_filter);
return 0;
err2:
tcf_em_tree_destroy(tp, &t);
+ kfree(fnew);
err1:
tcf_exts_destroy(tp, &e);
return err;
}
-static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
-{
- del_timer_sync(&f->perturb_timer);
- tcf_exts_destroy(tp, &f->exts);
- tcf_em_tree_destroy(tp, &f->ematches);
- kfree(f);
-}
-
static int flow_delete(struct tcf_proto *tp, unsigned long arg)
{
struct flow_filter *f = (struct flow_filter *)arg;
- tcf_tree_lock(tp);
- list_del(&f->list);
- tcf_tree_unlock(tp);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
return 0;
}
@@ -532,28 +554,29 @@ static int flow_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->filters);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
static void flow_destroy(struct tcf_proto *tp)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list) {
- list_del(&f->list);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list)
+ list_for_each_entry_rcu(f, &head->filters, list)
if (f->handle == handle)
return (unsigned long)f;
return 0;
@@ -626,10 +649,10 @@ nla_put_failure:
static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long)f, arg) < 0) {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 861b03ccfed0..2650285620ee 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,17 +33,20 @@
struct fw_head {
u32 mask;
- struct fw_filter *ht[HTSIZE];
+ struct fw_filter __rcu *ht[HTSIZE];
+ struct rcu_head rcu;
};
struct fw_filter {
- struct fw_filter *next;
+ struct fw_filter __rcu *next;
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static u32 fw_hash(u32 handle)
@@ -56,14 +59,16 @@ static u32 fw_hash(u32 handle)
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
int r;
u32 id = skb->mark;
if (head != NULL) {
id &= head->mask;
- for (f = head->ht[fw_hash(id)]; f; f = f->next) {
+
+ for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f;
+ f = rcu_dereference_bh(f->next)) {
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_IND
@@ -92,13 +97,14 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
if (head == NULL)
return 0;
- for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
+ f = rtnl_dereference(head->ht[fw_hash(handle)]);
+ for (; f; f = rtnl_dereference(f->next)) {
if (f->id == handle)
return (unsigned long)f;
}
@@ -114,8 +120,11 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
-static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct rcu_head *head)
{
+ struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
kfree(f);
@@ -123,7 +132,7 @@ static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
static void fw_destroy(struct tcf_proto *tp)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
@@ -131,29 +140,33 @@ static void fw_destroy(struct tcf_proto *tp)
return;
for (h = 0; h < HTSIZE; h++) {
- while ((f = head->ht[h]) != NULL) {
- head->ht[h] = f->next;
- fw_delete_filter(tp, f);
+ while ((f = rtnl_dereference(head->ht[h])) != NULL) {
+ RCU_INIT_POINTER(head->ht[h],
+ rtnl_dereference(f->next));
+ call_rcu(&f->rcu, fw_delete_filter);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)arg;
- struct fw_filter **fp;
+ struct fw_filter __rcu **fp;
+ struct fw_filter *pfp;
if (head == NULL || f == NULL)
goto out;
- for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
- fw_delete_filter(tp, f);
+ fp = &head->ht[fw_hash(f->id)];
+
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
+ call_rcu(&f->rcu, fw_delete_filter);
return 0;
}
}
@@ -171,7 +184,7 @@ static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct tcf_exts e;
u32 mask;
int err;
@@ -220,7 +233,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *) *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
@@ -233,10 +246,44 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
+ if (f) {
+ struct fw_filter *pfp, *fnew;
+ struct fw_filter __rcu **fp;
+
if (f->id != handle && handle)
return -EINVAL;
- return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+
+ fnew = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ fnew->id = f->id;
+ fnew->res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+ fnew->ifindex = f->ifindex;
+#endif /* CONFIG_NET_CLS_IND */
+ fnew->tp = f->tp;
+
+ tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+
+ err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+ if (err < 0) {
+ kfree(fnew);
+ return err;
+ }
+
+ fp = &head->ht[fw_hash(fnew->id)];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp))
+ if (pfp == f)
+ break;
+
+ RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
+ rcu_assign_pointer(*fp, fnew);
+ call_rcu(&f->rcu, fw_delete_filter);
+
+ *arg = (unsigned long)fnew;
+ return err;
}
if (!handle)
@@ -252,9 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
head->mask = mask;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
@@ -263,15 +308,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
+ f->tp = tp;
err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
- f->next = head->ht[fw_hash(handle)];
- tcf_tree_lock(tp);
- head->ht[fw_hash(handle)] = f;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
+ rcu_assign_pointer(head->ht[fw_hash(handle)], f);
*arg = (unsigned long)f;
return 0;
@@ -283,7 +327,7 @@ errout:
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
int h;
if (head == NULL)
@@ -295,7 +339,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < HTSIZE; h++) {
struct fw_filter *f;
- for (f = head->ht[h]; f; f = f->next) {
+ for (f = rtnl_dereference(head->ht[h]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -312,7 +357,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index dd9fc2523c76..ba96deacf27c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -29,25 +29,26 @@
* are mutually exclusive.
* 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
*/
-
struct route4_fastmap {
- struct route4_filter *filter;
- u32 id;
- int iif;
+ struct route4_filter *filter;
+ u32 id;
+ int iif;
};
struct route4_head {
- struct route4_fastmap fastmap[16];
- struct route4_bucket *table[256 + 1];
+ struct route4_fastmap fastmap[16];
+ struct route4_bucket __rcu *table[256 + 1];
+ struct rcu_head rcu;
};
struct route4_bucket {
/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
- struct route4_filter *ht[16 + 16 + 1];
+ struct route4_filter __rcu *ht[16 + 16 + 1];
+ struct rcu_head rcu;
};
struct route4_filter {
- struct route4_filter *next;
+ struct route4_filter __rcu *next;
u32 id;
int iif;
@@ -55,6 +56,8 @@ struct route4_filter {
struct tcf_exts exts;
u32 handle;
struct route4_bucket *bkt;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif)
return id & 0xF;
}
+static DEFINE_SPINLOCK(fastmap_lock);
static void
-route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+route4_reset_fastmap(struct route4_head *head)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
-
- spin_lock_bh(root_lock);
+ spin_lock_bh(&fastmap_lock);
memset(head->fastmap, 0, sizeof(head->fastmap));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(&fastmap_lock);
}
static void
@@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif,
{
int h = route4_fastmap_hash(id, iif);
+ /* fastmap updates must look atomic to aling id, iff, filter */
+ spin_lock_bh(&fastmap_lock);
head->fastmap[h].id = id;
head->fastmap[h].iif = iif;
head->fastmap[h].filter = f;
+ spin_unlock_bh(&fastmap_lock);
}
static inline int route4_hash_to(u32 id)
@@ -123,7 +128,7 @@ static inline int route4_hash_wild(void)
static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst;
struct route4_bucket *b;
struct route4_filter *f;
@@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
iif = inet_iif(skb);
h = route4_fastmap_hash(id, iif);
+
+ spin_lock(&fastmap_lock);
if (id == head->fastmap[h].id &&
iif == head->fastmap[h].iif &&
(f = head->fastmap[h].filter) != NULL) {
- if (f == ROUTE4_FAILURE)
+ if (f == ROUTE4_FAILURE) {
+ spin_unlock(&fastmap_lock);
goto failure;
+ }
*res = f->res;
+ spin_unlock(&fastmap_lock);
return 0;
}
+ spin_unlock(&fastmap_lock);
h = route4_hash_to(id);
restart:
- b = head->table[h];
+ b = rcu_dereference_bh(head->table[h]);
if (b) {
- for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_from(id)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->id == id)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_iif(iif)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->iif == iif)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_wild()]);
+ f;
+ f = rcu_dereference_bh(f->next))
ROUTE4_APPLY_RESULT();
-
}
if (h < 256) {
h = 256;
@@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id)
static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b;
struct route4_filter *f;
unsigned int h1, h2;
@@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
if (h2 > 32)
return 0;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
- for (f = b->ht[h2]; f; f = f->next)
+ for (f = rtnl_dereference(b->ht[h2]);
+ f;
+ f = rtnl_dereference(f->next))
if (f->handle == handle)
return (unsigned long)f;
}
@@ -248,8 +266,11 @@ static int route4_init(struct tcf_proto *tp)
}
static void
-route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+route4_delete_filter(struct rcu_head *head)
{
+ struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
kfree(f);
@@ -257,7 +278,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
static void route4_destroy(struct tcf_proto *tp)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
@@ -266,28 +287,35 @@ static void route4_destroy(struct tcf_proto *tp)
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
for (h2 = 0; h2 <= 32; h2++) {
struct route4_filter *f;
- while ((f = b->ht[h2]) != NULL) {
- b->ht[h2] = f->next;
- route4_delete_filter(tp, f);
+ while ((f = rtnl_dereference(b->ht[h2])) != NULL) {
+ struct route4_filter *next;
+
+ next = rtnl_dereference(f->next);
+ RCU_INIT_POINTER(b->ht[h2], next);
+ call_rcu(&f->rcu, route4_delete_filter);
}
}
- kfree(b);
+ RCU_INIT_POINTER(head->table[h1], NULL);
+ kfree_rcu(b, rcu);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct route4_head *head = tp->root;
- struct route4_filter **fp, *f = (struct route4_filter *)arg;
- unsigned int h = 0;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter *f = (struct route4_filter *)arg;
+ struct route4_filter __rcu **fp;
+ struct route4_filter *nf;
struct route4_bucket *b;
+ unsigned int h = 0;
int i;
if (!head || !f)
@@ -296,27 +324,35 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
h = f->handle;
b = f->bkt;
- for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
+ fp = &b->ht[from_hash(h >> 16)];
+ for (nf = rtnl_dereference(*fp); nf;
+ fp = &nf->next, nf = rtnl_dereference(*fp)) {
+ if (nf == f) {
+ /* unlink it */
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
- route4_reset_fastmap(tp->q, head, f->id);
- route4_delete_filter(tp, f);
+ /* Remove any fastmap lookups that might ref filter
+ * notice we unlink'd the filter so we can't get it
+ * back in the fastmap.
+ */
+ route4_reset_fastmap(head);
- /* Strip tree */
+ /* Delete it */
+ call_rcu(&f->rcu, route4_delete_filter);
- for (i = 0; i <= 32; i++)
- if (b->ht[i])
+ /* Strip RTNL protected tree */
+ for (i = 0; i <= 32; i++) {
+ struct route4_filter *rt;
+
+ rt = rtnl_dereference(b->ht[i]);
+ if (rt)
return 0;
+ }
/* OK, session has no flows */
- tcf_tree_lock(tp);
- head->table[to_hash(h)] = NULL;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
+ kfree_rcu(b, rcu);
- kfree(b);
return 0;
}
}
@@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
}
h1 = to_hash(nhandle);
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (!b) {
err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
goto errout;
- tcf_tree_lock(tp);
- head->table[h1] = b;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(head->table[h1], b);
} else {
unsigned int h2 = from_hash(nhandle >> 16);
err = -EEXIST;
- for (fp = b->ht[h2]; fp; fp = fp->next)
+ for (fp = rtnl_dereference(b->ht[h2]);
+ fp;
+ fp = rtnl_dereference(fp->next))
if (fp->handle == f->handle)
goto errout;
}
- tcf_tree_lock(tp);
if (tb[TCA_ROUTE4_TO])
f->id = to;
@@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
f->handle = nhandle;
f->bkt = b;
- tcf_tree_unlock(tp);
+ f->tp = tp;
if (tb[TCA_ROUTE4_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
@@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct route4_head *head = tp->root;
- struct route4_filter *f, *f1, **fp;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter __rcu **fp;
+ struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
- u32 old_handle = 0;
int err;
+ bool new = true;
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -447,70 +483,70 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- f = (struct route4_filter *)*arg;
- if (f) {
- if (f->handle != handle && handle)
+ fold = (struct route4_filter *)*arg;
+ if (fold && handle && fold->handle != handle)
return -EINVAL;
- if (f->bkt)
- old_handle = f->handle;
-
- err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 0, ovr);
- if (err < 0)
- return err;
-
- goto reinsert;
- }
-
err = -ENOBUFS;
if (head == NULL) {
head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL)
goto errout;
-
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
- if (f == NULL)
+ if (!f)
goto errout;
tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ if (fold) {
+ f->id = fold->id;
+ f->iif = fold->iif;
+ f->res = fold->res;
+ f->handle = fold->handle;
+
+ f->tp = fold->tp;
+ f->bkt = fold->bkt;
+ new = false;
+ }
+
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 1, ovr);
+ tca[TCA_RATE], new, ovr);
if (err < 0)
goto errout;
-reinsert:
h = from_hash(f->handle >> 16);
- for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
+ fp = &f->bkt->ht[h];
+ for (pfp = rtnl_dereference(*fp);
+ (f1 = rtnl_dereference(*fp)) != NULL;
+ fp = &f1->next)
if (f->handle < f1->handle)
break;
- f->next = f1;
- tcf_tree_lock(tp);
- *fp = f;
+ rcu_assign_pointer(f->next, f1);
+ rcu_assign_pointer(*fp, f);
- if (old_handle && f->handle != old_handle) {
- th = to_hash(old_handle);
- h = from_hash(old_handle >> 16);
- b = head->table[th];
+ if (fold && fold->handle && f->handle != fold->handle) {
+ th = to_hash(fold->handle);
+ h = from_hash(fold->handle >> 16);
+ b = rtnl_dereference(head->table[th]);
if (b) {
- for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
+ fp = &b->ht[h];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
*fp = f->next;
break;
}
}
}
}
- tcf_tree_unlock(tp);
- route4_reset_fastmap(tp->q, head, f->id);
+ route4_reset_fastmap(head);
*arg = (unsigned long)f;
+ if (fold)
+ call_rcu(&fold->rcu, route4_delete_filter);
return 0;
errout:
@@ -520,7 +556,7 @@ errout:
static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (head == NULL)
@@ -530,13 +566,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
return;
for (h = 0; h <= 256; h++) {
- struct route4_bucket *b = head->table[h];
+ struct route4_bucket *b = rtnl_dereference(head->table[h]);
if (b) {
for (h1 = 0; h1 <= 32; h1++) {
struct route4_filter *f;
- for (f = b->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(b->ht[h1]);
+ f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 1020e233a5d6..b044c208b133 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -70,31 +70,34 @@ struct rsvp_head {
u32 tmap[256/32];
u32 hgenerator;
u8 tgenerator;
- struct rsvp_session *ht[256];
+ struct rsvp_session __rcu *ht[256];
+ struct rcu_head rcu;
};
struct rsvp_session {
- struct rsvp_session *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
+ struct rsvp_session __rcu *next;
+ __be32 dst[RSVP_DST_LEN];
+ struct tc_rsvp_gpi dpi;
+ u8 protocol;
+ u8 tunnelid;
/* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter *ht[16 + 1];
+ struct rsvp_filter __rcu *ht[16 + 1];
+ struct rcu_head rcu;
};
struct rsvp_filter {
- struct rsvp_filter *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
+ struct rsvp_filter __rcu *next;
+ __be32 src[RSVP_DST_LEN];
+ struct tc_rsvp_gpi spi;
+ u8 tunnelhdr;
- struct tcf_result res;
- struct tcf_exts exts;
+ struct tcf_result res;
+ struct tcf_exts exts;
- u32 handle;
- struct rsvp_session *sess;
+ u32 handle;
+ struct rsvp_session *sess;
+ struct rcu_head rcu;
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -128,7 +131,7 @@ static inline unsigned int hash_src(__be32 *src)
static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rcu_dereference_bh(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1, h2;
@@ -169,7 +172,8 @@ restart:
h1 = hash_dst(dst, protocol, tunnelid);
h2 = hash_src(src);
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rcu_dereference_bh(head->ht[h1]); s;
+ s = rcu_dereference_bh(s->next)) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
protocol == s->protocol &&
!(s->dpi.mask &
@@ -181,7 +185,8 @@ restart:
#endif
tunnelid == s->tunnelid) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[h2]); f;
+ f = rcu_dereference_bh(f->next)) {
if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
!(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
#if RSVP_DST_LEN == 4
@@ -205,7 +210,8 @@ matched:
}
/* And wildcard bucket... */
- for (f = s->ht[16]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[16]); f;
+ f = rcu_dereference_bh(f->next)) {
*res = f->res;
RSVP_APPLY_RESULT();
goto matched;
@@ -218,7 +224,7 @@ matched:
static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1 = handle & 0xFF;
@@ -227,8 +233,10 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
if (h2 > 16)
return 0;
- for (s = sht[h1]; s; s = s->next) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (s = rtnl_dereference(head->ht[h1]); s;
+ s = rtnl_dereference(s->next)) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->handle == handle)
return (unsigned long)f;
}
@@ -246,7 +254,7 @@ static int rsvp_init(struct tcf_proto *tp)
data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
if (data) {
- tp->root = data;
+ rcu_assign_pointer(tp->root, data);
return 0;
}
return -ENOBUFS;
@@ -257,53 +265,54 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
- kfree(f);
+ kfree_rcu(f, rcu);
}
static void rsvp_destroy(struct tcf_proto *tp)
{
- struct rsvp_head *data = xchg(&tp->root, NULL);
- struct rsvp_session **sht;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
return;
- sht = data->ht;
+ RCU_INIT_POINTER(tp->root, NULL);
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- while ((s = sht[h1]) != NULL) {
- sht[h1] = s->next;
+ while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
+ RCU_INIT_POINTER(data->ht[h1], s->next);
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- while ((f = s->ht[h2]) != NULL) {
- s->ht[h2] = f->next;
+ while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
+ rcu_assign_pointer(s->ht[h2], f->next);
rsvp_delete_filter(tp, f);
}
}
- kfree(s);
+ kfree_rcu(s, rcu);
}
}
- kfree(data);
+ kfree_rcu(data, rcu);
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
+ struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_filter __rcu **fp;
unsigned int h = f->handle;
- struct rsvp_session **sp;
- struct rsvp_session *s = f->sess;
+ struct rsvp_session __rcu **sp;
+ struct rsvp_session *nsp, *s = f->sess;
int i;
- for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
+ fp = &s->ht[(h >> 8) & 0xFF];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ if (nfp == f) {
+ RCU_INIT_POINTER(*fp, f->next);
rsvp_delete_filter(tp, f);
/* Strip tree */
@@ -313,14 +322,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
/* OK, session has no flows */
- for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
- *sp; sp = &(*sp)->next) {
- if (*sp == s) {
- tcf_tree_lock(tp);
- *sp = s->next;
- tcf_tree_unlock(tp);
-
- kfree(s);
+ sp = &head->ht[h & 0xFF];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if (nsp == s) {
+ RCU_INIT_POINTER(*sp, s->next);
+ kfree_rcu(s, rcu);
return 0;
}
}
@@ -333,7 +340,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
{
- struct rsvp_head *data = tp->root;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int i = 0xFFFF;
while (i-- > 0) {
@@ -361,7 +368,7 @@ static int tunnel_bts(struct rsvp_head *data)
static void tunnel_recycle(struct rsvp_head *data)
{
- struct rsvp_session **sht = data->ht;
+ struct rsvp_session __rcu **sht = data->ht;
u32 tmap[256/32];
int h1, h2;
@@ -369,11 +376,13 @@ static void tunnel_recycle(struct rsvp_head *data)
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rtnl_dereference(sht[h1]); s;
+ s = rtnl_dereference(s->next)) {
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->tunnelhdr == 0)
continue;
data->tgenerator = f->res.classid;
@@ -417,9 +426,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct rsvp_head *data = tp->root;
- struct rsvp_filter *f, **fp;
- struct rsvp_session *s, **sp;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
+ struct rsvp_filter *f, *nfp;
+ struct rsvp_filter __rcu **fp;
+ struct rsvp_session *nsp, *s;
+ struct rsvp_session __rcu **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
@@ -499,7 +510,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
+ for (sp = &data->ht[h1];
+ (s = rtnl_dereference(*sp)) != NULL;
+ sp = &s->next) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
pinfo && pinfo->protocol == s->protocol &&
memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -521,12 +534,16 @@ insert:
tcf_exts_change(tp, &f->exts, &e);
- for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
- if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
+ fp = &s->ht[h2];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ __u32 mask = nfp->spi.mask & f->spi.mask;
+
+ if (mask != f->spi.mask)
break;
- f->next = *fp;
- wmb();
- *fp = f;
+ }
+ RCU_INIT_POINTER(f->next, nfp);
+ rcu_assign_pointer(*fp, f);
*arg = (unsigned long)f;
return 0;
@@ -546,13 +563,14 @@ insert:
s->protocol = pinfo->protocol;
s->tunnelid = pinfo->tunnelid;
}
- for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
- if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+ sp = &data->ht[h1];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
break;
}
- s->next = *sp;
- wmb();
- *sp = s;
+ RCU_INIT_POINTER(s->next, nsp);
+ rcu_assign_pointer(*sp, s);
goto insert;
@@ -565,7 +583,7 @@ errout2:
static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct rsvp_head *head = tp->root;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (arg->stop)
@@ -574,11 +592,13 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < 256; h++) {
struct rsvp_session *s;
- for (s = head->ht[h]; s; s = s->next) {
+ for (s = rtnl_dereference(head->ht[h]); s;
+ s = rtnl_dereference(s->next)) {
for (h1 = 0; h1 <= 16; h1++) {
struct rsvp_filter *f;
- for (f = s->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h1]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3e9f76413b3b..5054fae33a48 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -32,19 +32,21 @@ struct tcindex_filter_result {
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
- struct tcindex_filter *next;
+ struct tcindex_filter __rcu *next;
+ struct rcu_head rcu;
};
struct tcindex_data {
struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
- NULL if unused */
+ struct tcindex_filter __rcu **h; /* imperfect hash; */
+ struct tcf_proto *tp;
u16 mask; /* AND key with mask */
- int shift; /* shift ANDed key to the right */
- int hash; /* hash table size; 0 if undefined */
- int alloc_hash; /* allocated size */
- int fall_through; /* 0: only classify if explicit match */
+ u32 shift; /* shift ANDed key to the right */
+ u32 hash; /* hash table size; 0 if undefined */
+ u32 alloc_hash; /* allocated size */
+ u32 fall_through; /* 0: only classify if explicit match */
+ struct rcu_head rcu;
};
static inline int
@@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r)
static struct tcindex_filter_result *
tcindex_lookup(struct tcindex_data *p, u16 key)
{
- struct tcindex_filter *f;
+ if (p->perfect) {
+ struct tcindex_filter_result *f = p->perfect + key;
+
+ return tcindex_filter_is_set(f) ? f : NULL;
+ } else if (p->h) {
+ struct tcindex_filter __rcu **fp;
+ struct tcindex_filter *f;
- if (p->perfect)
- return tcindex_filter_is_set(p->perfect + key) ?
- p->perfect + key : NULL;
- else if (p->h) {
- for (f = p->h[key % p->hash]; f; f = f->next)
+ fp = &p->h[key % p->hash];
+ for (f = rcu_dereference_bh_rtnl(*fp);
+ f;
+ fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
if (f->key == key)
return &f->result;
}
@@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
@@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp)
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
- tp->root = p;
+ rcu_assign_pointer(tp->root, p);
return 0;
}
-
static int
-__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+ struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
- pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+ pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
} else {
int i;
- struct tcindex_filter **walk = NULL;
- for (i = 0; i < p->hash; i++)
- for (walk = p->h+i; *walk; walk = &(*walk)->next)
- if (&(*walk)->result == r)
+ for (i = 0; i < p->hash; i++) {
+ walk = p->h + i;
+ for (f = rtnl_dereference(*walk); f;
+ walk = &f->next, f = rtnl_dereference(*walk)) {
+ if (&f->result == r)
goto found;
+ }
+ }
return -ENOENT;
found:
- f = *walk;
- if (lock)
- tcf_tree_lock(tp);
- *walk = f->next;
- if (lock)
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
tcf_exts_destroy(tp, &r->exts);
- kfree(f);
+ if (f)
+ kfree_rcu(f, rcu);
return 0;
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_destroy_element(struct tcf_proto *tp,
+ unsigned long arg,
+ struct tcf_walker *walker)
+{
+ return tcindex_delete(tp, arg);
+}
+
+static void __tcindex_destroy(struct rcu_head *head)
{
- return __tcindex_delete(tp, arg, 1);
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p->h);
+ kfree(p);
}
static inline int
@@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r)
tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
+static void __tcindex_partial_destroy(struct rcu_head *head)
+{
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
@@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
- struct tcindex_data cp;
+ struct tcindex_data *cp, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_exts e;
@@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (err < 0)
return err;
- memcpy(&cp, p, sizeof(cp));
- tcindex_filter_result_init(&new_filter_result);
+ /* tcindex_data attributes must look atomic to classifier/lookup so
+ * allocate new tcindex data and RCU assign it onto root. Keeping
+ * perfect hash and hash pointers from old data.
+ */
+ cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+ if (!cp) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ cp->mask = p->mask;
+ cp->shift = p->shift;
+ cp->hash = p->hash;
+ cp->alloc_hash = p->alloc_hash;
+ cp->fall_through = p->fall_through;
+ cp->tp = tp;
+
+ if (p->perfect) {
+ cp->perfect = kmemdup(p->perfect,
+ sizeof(*r) * cp->hash, GFP_KERNEL);
+ if (!cp->perfect)
+ goto errout;
+ balloc = 1;
+ }
+ cp->h = p->h;
+
+ tcindex_filter_result_init(&new_filter_result);
tcindex_filter_result_init(&cr);
if (old_r)
cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
- cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
if (tb[TCA_TCINDEX_MASK])
- cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
if (tb[TCA_TCINDEX_SHIFT])
- cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
err = -EBUSY;
+
/* Hash already allocated, make sure that we still meet the
* requirements for the allocated hash.
*/
- if (cp.perfect) {
- if (!valid_perfect_hash(&cp) ||
- cp.hash > cp.alloc_hash)
- goto errout;
- } else if (cp.h && cp.hash != cp.alloc_hash)
- goto errout;
+ if (cp->perfect) {
+ if (!valid_perfect_hash(cp) ||
+ cp->hash > cp->alloc_hash)
+ goto errout_alloc;
+ } else if (cp->h && cp->hash != cp->alloc_hash) {
+ goto errout_alloc;
+ }
err = -EINVAL;
if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+ cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp.hash) {
+ if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
*/
- if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
- cp.hash = (cp.mask >> cp.shift) + 1;
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
else
- cp.hash = DEFAULT_HASH_SIZE;
+ cp->hash = DEFAULT_HASH_SIZE;
}
- if (!cp.perfect && !cp.h)
- cp.alloc_hash = cp.hash;
+ if (!cp->perfect && cp->h)
+ cp->alloc_hash = cp->hash;
/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
* but then, we'd fail handles that may become valid after some future
* mask change. While this is extremely unlikely to ever matter,
* the check below is safer (and also more backwards-compatible).
*/
- if (cp.perfect || valid_perfect_hash(&cp))
- if (handle >= cp.alloc_hash)
- goto errout;
+ if (cp->perfect || valid_perfect_hash(cp))
+ if (handle >= cp->alloc_hash)
+ goto errout_alloc;
err = -ENOMEM;
- if (!cp.perfect && !cp.h) {
- if (valid_perfect_hash(&cp)) {
+ if (!cp->perfect && !cp->h) {
+ if (valid_perfect_hash(cp)) {
int i;
- cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
- if (!cp.perfect)
- goto errout;
- for (i = 0; i < cp.hash; i++)
- tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
+ if (!cp->perfect)
+ goto errout_alloc;
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
balloc = 1;
} else {
- cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
- if (!cp.h)
- goto errout;
+ struct tcindex_filter __rcu **hash;
+
+ hash = kcalloc(cp->hash,
+ sizeof(struct tcindex_filter *),
+ GFP_KERNEL);
+
+ if (!hash)
+ goto errout_alloc;
+
+ cp->h = hash;
balloc = 2;
}
}
- if (cp.perfect)
- r = cp.perfect + handle;
+ if (cp->perfect)
+ r = cp->perfect + handle;
else
- r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+ r = tcindex_lookup(cp, handle) ? : &new_filter_result;
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
else
tcf_exts_change(tp, &cr.exts, &e);
- tcf_tree_lock(tp);
if (old_r && old_r != r)
tcindex_filter_result_init(old_r);
- memcpy(p, &cp, sizeof(cp));
+ oldp = p;
r->res = cr.res;
+ rcu_assign_pointer(tp->root, cp);
if (r == &new_filter_result) {
- struct tcindex_filter **fp;
+ struct tcindex_filter *nfp;
+ struct tcindex_filter __rcu **fp;
f->key = handle;
f->result = new_filter_result;
f->next = NULL;
- for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
- /* nothing */;
- *fp = f;
+
+ fp = cp->h + (handle % cp->hash);
+ for (nfp = rtnl_dereference(*fp);
+ nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp))
+ ; /* nothing */
+
+ rcu_assign_pointer(*fp, f);
}
- tcf_tree_unlock(tp);
+ if (oldp)
+ call_rcu(&oldp->rcu, __tcindex_partial_destroy);
return 0;
errout_alloc:
if (balloc == 1)
- kfree(cp.perfect);
+ kfree(cp->perfect);
else if (balloc == 2)
- kfree(cp.h);
+ kfree(cp->h);
errout:
+ kfree(cp);
tcf_exts_destroy(tp, &e);
return err;
}
@@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
int err;
@@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr);
}
-
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
int i;
@@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
if (!p->h)
return;
for (i = 0; i < p->hash; i++) {
- for (f = p->h[i]; f; f = next) {
- next = f->next;
+ for (f = rtnl_dereference(p->h[i]); f; f = next) {
+ next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
if (walker->fn(tp, (unsigned long) &f->result,
walker) < 0) {
@@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-
-static int tcindex_destroy_element(struct tcf_proto *tp,
- unsigned long arg, struct tcf_walker *walker)
-{
- return __tcindex_delete(tp, arg, 0);
-}
-
-
static void tcindex_destroy(struct tcf_proto *tp)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
@@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp)
walker.skip = 0;
walker.fn = tcindex_destroy_element;
tcindex_walk(tp, &walker);
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
- tp->root = NULL;
+
+ RCU_INIT_POINTER(tp->root, NULL);
+ call_rcu(&p->rcu, __tcindex_destroy);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
- t->tcm_handle = r-p->perfect;
+ t->tcm_handle = r - p->perfect;
} else {
struct tcindex_filter *f;
+ struct tcindex_filter __rcu **fp;
int i;
t->tcm_handle = 0;
for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- for (f = p->h[i]; !t->tcm_handle && f;
- f = f->next) {
+ fp = &p->h[i];
+ for (f = rtnl_dereference(*fp);
+ !t->tcm_handle && f;
+ fp = &f->next, f = rtnl_dereference(*fp)) {
if (&f->result == r)
t->tcm_handle = f->key;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 70c0be8d0121..ef97a646ee94 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
@@ -44,40 +45,49 @@
#include <net/pkt_cls.h>
struct tc_u_knode {
- struct tc_u_knode *next;
+ struct tc_u_knode __rcu *next;
u32 handle;
- struct tc_u_hnode *ht_up;
+ struct tc_u_hnode __rcu *ht_up;
struct tcf_exts exts;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif
u8 fshift;
struct tcf_result res;
- struct tc_u_hnode *ht_down;
+ struct tc_u_hnode __rcu *ht_down;
#ifdef CONFIG_CLS_U32_PERF
- struct tc_u32_pcnt *pf;
+ struct tc_u32_pcnt __percpu *pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
- struct tc_u32_mark mark;
+ u32 val;
+ u32 mask;
+ u32 __percpu *pcpu_success;
#endif
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
+ /* The 'sel' field MUST be the last field in structure to allow for
+ * tc_u32_keys allocated at end of structure.
+ */
struct tc_u32_sel sel;
};
struct tc_u_hnode {
- struct tc_u_hnode *next;
+ struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
- struct tc_u_knode *ht[1];
+ struct tc_u_knode __rcu *ht[1];
+ struct rcu_head rcu;
};
struct tc_u_common {
- struct tc_u_hnode *hlist;
+ struct tc_u_hnode __rcu *hlist;
struct Qdisc *q;
int refcnt;
u32 hgenerator;
+ struct rcu_head rcu;
};
static inline unsigned int u32_hash_fold(__be32 key,
@@ -96,7 +106,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = tp->root;
+ struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -108,23 +118,23 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
int i, r;
next_ht:
- n = ht->ht[sel];
+ n = rcu_dereference_bh(ht->ht[sel]);
next_knode:
if (n) {
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rcnt += 1;
+ __this_cpu_inc(n->pf->rcnt);
j = 0;
#endif
#ifdef CONFIG_CLS_U32_MARK
- if ((skb->mark & n->mark.mask) != n->mark.val) {
- n = n->next;
+ if ((skb->mark & n->mask) != n->val) {
+ n = rcu_dereference_bh(n->next);
goto next_knode;
} else {
- n->mark.success++;
+ __this_cpu_inc(*n->pcpu_success);
}
#endif
@@ -139,37 +149,39 @@ next_knode:
if (!data)
goto out;
if ((*data ^ key->val) & key->mask) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
- n->pf->kcnts[j] += 1;
+ __this_cpu_inc(n->pf->kcnts[j]);
j++;
#endif
}
- if (n->ht_down == NULL) {
+
+ ht = rcu_dereference_bh(n->ht_down);
+ if (!ht) {
check_terminal:
if (n->sel.flags & TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
if (!tcf_match_indev(skb, n->ifindex)) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#endif
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rhit += 1;
+ __this_cpu_inc(n->pf->rhit);
#endif
r = tcf_exts_exec(skb, &n->exts, res);
if (r < 0) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
return r;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
@@ -180,7 +192,7 @@ check_terminal:
stack[sdepth].off = off;
sdepth++;
- ht = n->ht_down;
+ ht = rcu_dereference_bh(n->ht_down);
sel = 0;
if (ht->divisor) {
__be32 *data, hdata;
@@ -222,7 +234,7 @@ check_terminal:
/* POP */
if (sdepth--) {
n = stack[sdepth].knode;
- ht = n->ht_up;
+ ht = rcu_dereference_bh(n->ht_up);
off = stack[sdepth].off;
goto check_terminal;
}
@@ -239,7 +251,9 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
- for (ht = tp_c->hlist; ht; ht = ht->next)
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
if (ht->handle == handle)
break;
@@ -256,7 +270,9 @@ u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
if (sel > ht->divisor)
goto out;
- for (n = ht->ht[sel]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[sel]);
+ n;
+ n = rtnl_dereference(n->next))
if (n->handle == handle)
break;
out:
@@ -270,7 +286,7 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
struct tc_u_common *tp_c = tp->data;
if (TC_U32_HTID(handle) == TC_U32_ROOT)
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
else
ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
@@ -291,6 +307,9 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
{
int i = 0x800;
+ /* hgenerator only used inside rtnl lock it is safe to increment
+ * without read _copy_ update semantics
+ */
do {
if (++tp_c->hgenerator == 0x7FF)
tp_c->hgenerator = 1;
@@ -326,41 +345,78 @@ static int u32_init(struct tcf_proto *tp)
}
tp_c->refcnt++;
- root_ht->next = tp_c->hlist;
- tp_c->hlist = root_ht;
+ RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, root_ht);
root_ht->tp_c = tp_c;
- tp->root = root_ht;
+ rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+static int u32_destroy_key(struct tcf_proto *tp,
+ struct tc_u_knode *n,
+ bool free_pf)
{
tcf_unbind_filter(tp, &n->res);
tcf_exts_destroy(tp, &n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ if (free_pf)
+ free_percpu(n->pf);
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+ if (free_pf)
+ free_percpu(n->pcpu_success);
#endif
kfree(n);
return 0;
}
+/* u32_delete_key_rcu should be called when free'ing a copied
+ * version of a tc_u_knode obtained from u32_init_knode(). When
+ * copies are obtained from u32_init_knode() the statistics are
+ * shared between the old and new copies to allow readers to
+ * continue to update the statistics during the copy. To support
+ * this the u32_delete_key_rcu variant does not free the percpu
+ * statistics.
+ */
+static void u32_delete_key_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, false);
+}
+
+/* u32_delete_key_freepf_rcu is the rcu callback variant
+ * that free's the entire structure including the statistics
+ * percpu variables. Only use this if the key is not a copy
+ * returned by u32_init_knode(). See u32_delete_key_rcu()
+ * for the variant that should be used with keys return from
+ * u32_init_knode()
+ */
+static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, true);
+}
+
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
- struct tc_u_knode **kp;
- struct tc_u_hnode *ht = key->ht_up;
+ struct tc_u_knode __rcu **kp;
+ struct tc_u_knode *pkp;
+ struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
if (ht) {
- for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
- if (*kp == key) {
- tcf_tree_lock(tp);
- *kp = key->next;
- tcf_tree_unlock(tp);
+ kp = &ht->ht[TC_U32_HASH(key->handle)];
+ for (pkp = rtnl_dereference(*kp); pkp;
+ kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
+ if (pkp == key) {
+ RCU_INIT_POINTER(*kp, key->next);
- u32_destroy_key(tp, key);
+ call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
}
@@ -369,16 +425,16 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tc_u_hnode *ht)
{
struct tc_u_knode *n;
unsigned int h;
for (h = 0; h <= ht->divisor; h++) {
- while ((n = ht->ht[h]) != NULL) {
- ht->ht[h] = n->next;
-
- u32_destroy_key(tp, n);
+ while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
+ RCU_INIT_POINTER(ht->ht[h],
+ rtnl_dereference(n->next));
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
}
@@ -386,28 +442,31 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode **hn;
+ struct tc_u_hnode __rcu **hn;
+ struct tc_u_hnode *phn;
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(ht);
- for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
- if (*hn == ht) {
- *hn = ht->next;
- kfree(ht);
+ hn = &tp_c->hlist;
+ for (phn = rtnl_dereference(*hn);
+ phn;
+ hn = &phn->next, phn = rtnl_dereference(*hn)) {
+ if (phn == ht) {
+ RCU_INIT_POINTER(*hn, ht->next);
+ kfree_rcu(ht, rcu);
return 0;
}
}
- WARN_ON(1);
return -ENOENT;
}
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = tp->root;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
@@ -419,17 +478,16 @@ static void u32_destroy(struct tcf_proto *tp)
tp->q->u32_node = NULL;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(ht);
}
- while ((ht = tp_c->hlist) != NULL) {
- tp_c->hlist = ht->next;
-
- WARN_ON(ht->refcnt != 0);
-
- kfree(ht);
+ while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
+ RCU_INIT_POINTER(tp_c->hlist, ht->next);
+ kfree_rcu(ht, rcu);
}
kfree(tp_c);
@@ -441,6 +499,7 @@ static void u32_destroy(struct tcf_proto *tp)
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
if (ht == NULL)
return 0;
@@ -448,7 +507,7 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
if (TC_U32_KEY(ht->handle))
return u32_delete_key(tp, (struct tc_u_knode *)ht);
- if (tp->root == ht)
+ if (root_ht == ht)
return -EINVAL;
if (ht->refcnt == 1) {
@@ -471,7 +530,9 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
if (!bitmap)
return handle | 0xFFF;
- for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
+ n;
+ n = rtnl_dereference(n->next))
set_bit(TC_U32_NODE(n->handle), bitmap);
i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
@@ -521,10 +582,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
ht_down->refcnt++;
}
- tcf_tree_lock(tp);
- ht_old = n->ht_down;
- n->ht_down = ht_down;
- tcf_tree_unlock(tp);
+ ht_old = rtnl_dereference(n->ht_down);
+ rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
ht_old->refcnt--;
@@ -551,6 +610,82 @@ errout:
return err;
}
+static void u32_replace_knode(struct tcf_proto *tp,
+ struct tc_u_common *tp_c,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+ struct tc_u_hnode *ht;
+
+ if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
+ ht = rtnl_dereference(tp->root);
+ else
+ ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
+
+ ins = &ht->ht[TC_U32_HASH(n->handle)];
+
+ /* The node must always exist for it to be replaced if this is not the
+ * case then something went very wrong elsewhere.
+ */
+ for (pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (pins->handle == n->handle)
+ break;
+
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+}
+
+static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode *new;
+ struct tc_u32_sel *s = &n->sel;
+
+ new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+ GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ RCU_INIT_POINTER(new->next, n->next);
+ new->handle = n->handle;
+ RCU_INIT_POINTER(new->ht_up, n->ht_up);
+
+#ifdef CONFIG_NET_CLS_IND
+ new->ifindex = n->ifindex;
+#endif
+ new->fshift = n->fshift;
+ new->res = n->res;
+ RCU_INIT_POINTER(new->ht_down, n->ht_down);
+
+ /* bump reference count as long as we hold pointer to structure */
+ if (new->ht_down)
+ new->ht_down->refcnt++;
+
+#ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+ * a special destroy call must be made to not free the pf memory.
+ */
+ new->pf = n->pf;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+ new->val = n->val;
+ new->mask = n->mask;
+ /* Similarly success statistics must be moved as pointers */
+ new->pcpu_success = n->pcpu_success;
+#endif
+ new->tp = tp;
+ memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+
+ tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+
+ return new;
+}
+
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
@@ -564,6 +699,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_U32_MAX + 1];
u32 htid;
int err;
+#ifdef CONFIG_CLS_U32_PERF
+ size_t size;
+#endif
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -574,11 +712,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n = (struct tc_u_knode *)*arg;
if (n) {
+ struct tc_u_knode *new;
+
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(net, tp, base, n->ht_up, n, tb,
- tca[TCA_RATE], ovr);
+ new = u32_init_knode(tp, n);
+ if (!new)
+ return -ENOMEM;
+
+ err = u32_set_parms(net, tp, base,
+ rtnl_dereference(n->ht_up), new, tb,
+ tca[TCA_RATE], ovr);
+
+ if (err) {
+ u32_destroy_key(tp, new, false);
+ return err;
+ }
+
+ u32_replace_knode(tp, tp_c, new);
+ call_rcu(&n->rcu, u32_delete_key_rcu);
+ return 0;
}
if (tb[TCA_U32_DIVISOR]) {
@@ -601,8 +755,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
- ht->next = tp_c->hlist;
- tp_c->hlist = ht;
+ RCU_INIT_POINTER(ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, ht);
*arg = (unsigned long)ht;
return 0;
}
@@ -610,7 +764,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_HASH]) {
htid = nla_get_u32(tb[TCA_U32_HASH]);
if (TC_U32_HTID(htid) == TC_U32_ROOT) {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
@@ -618,7 +772,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
} else {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
@@ -642,46 +796,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
#ifdef CONFIG_CLS_U32_PERF
- n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
- if (n->pf == NULL) {
+ size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
+ n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
+ if (!n->pf) {
kfree(n);
return -ENOBUFS;
}
#endif
memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- n->ht_up = ht;
+ RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ n->tp = tp;
#ifdef CONFIG_CLS_U32_MARK
+ n->pcpu_success = alloc_percpu(u32);
+ if (!n->pcpu_success) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
if (tb[TCA_U32_MARK]) {
struct tc_u32_mark *mark;
mark = nla_data(tb[TCA_U32_MARK]);
- memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
- n->mark.success = 0;
+ n->val = mark->val;
+ n->mask = mark->mask;
}
#endif
err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
- struct tc_u_knode **ins;
- for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
- if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+
+ ins = &ht->ht[TC_U32_HASH(handle)];
+ for (pins = rtnl_dereference(*ins); pins;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
break;
- n->next = *ins;
- tcf_tree_lock(tp);
- *ins = n;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(n->next, pins);
+ rcu_assign_pointer(*ins, n);
*arg = (unsigned long)n;
return 0;
}
+
+#ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+errout:
+#endif
+
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ free_percpu(n->pf);
#endif
kfree(n);
return err;
@@ -697,7 +867,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->stop)
return;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
if (arg->count >= arg->skip) {
@@ -708,7 +880,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
arg->count++;
for (h = 0; h <= ht->divisor; h++) {
- for (n = ht->ht[h]; n; n = n->next) {
+ for (n = rtnl_dereference(ht->ht[h]);
+ n;
+ n = rtnl_dereference(n->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -727,6 +901,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
+ struct tc_u_hnode *ht_up, *ht_down;
struct nlattr *nest;
if (n == NULL)
@@ -745,11 +920,18 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
goto nla_put_failure;
} else {
+#ifdef CONFIG_CLS_U32_PERF
+ struct tc_u32_pcnt *gpf;
+ int cpu;
+#endif
+
if (nla_put(skb, TCA_U32_SEL,
sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
&n->sel))
goto nla_put_failure;
- if (n->ht_up) {
+
+ ht_up = rtnl_dereference(n->ht_up);
+ if (ht_up) {
u32 htid = n->handle & 0xFFFFF000;
if (nla_put_u32(skb, TCA_U32_HASH, htid))
goto nla_put_failure;
@@ -757,14 +939,28 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (n->res.classid &&
nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
goto nla_put_failure;
- if (n->ht_down &&
- nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
+
+ ht_down = rtnl_dereference(n->ht_down);
+ if (ht_down &&
+ nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
goto nla_put_failure;
#ifdef CONFIG_CLS_U32_MARK
- if ((n->mark.val || n->mark.mask) &&
- nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
- goto nla_put_failure;
+ if ((n->val || n->mask)) {
+ struct tc_u32_mark mark = {.val = n->val,
+ .mask = n->mask,
+ .success = 0};
+ int cpum;
+
+ for_each_possible_cpu(cpum) {
+ __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
+
+ mark.success += cnt;
+ }
+
+ if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
+ goto nla_put_failure;
+ }
#endif
if (tcf_exts_dump(skb, &n->exts) < 0)
@@ -779,10 +975,29 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
}
#endif
#ifdef CONFIG_CLS_U32_PERF
+ gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
+ n->sel.nkeys * sizeof(u64),
+ GFP_KERNEL);
+ if (!gpf)
+ goto nla_put_failure;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
+
+ gpf->rcnt += pf->rcnt;
+ gpf->rhit += pf->rhit;
+ for (i = 0; i < n->sel.nkeys; i++)
+ gpf->kcnts[i] += pf->kcnts[i];
+ }
+
if (nla_put(skb, TCA_U32_PCNT,
sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- n->pf))
+ gpf)) {
+ kfree(gpf);
goto nla_put_failure;
+ }
+ kfree(gpf);
#endif
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 58bed7599db7..ca6248345937 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1781,7 +1781,7 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
__be16 protocol = skb->protocol;
int err;
- for (; tp; tp = tp->next) {
+ for (; tp; tp = rcu_dereference_bh(tp->next)) {
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
@@ -1833,15 +1833,15 @@ void tcf_destroy(struct tcf_proto *tp)
{
tp->ops->destroy(tp);
module_put(tp->ops->owner);
- kfree(tp);
+ kfree_rcu(tp, rcu);
}
-void tcf_destroy_chain(struct tcf_proto **fl)
+void tcf_destroy_chain(struct tcf_proto __rcu **fl)
{
struct tcf_proto *tp;
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
+ while ((tp = rtnl_dereference(*fl)) != NULL) {
+ RCU_INIT_POINTER(*fl, tp->next);
tcf_destroy(tp);
}
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 8449b337f9e3..c398f9c3dbdd 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,7 +41,7 @@
struct atm_flow_data {
struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
@@ -273,7 +273,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
error = -ENOBUFS;
goto err_out;
}
- flow->filter_list = NULL;
+ RCU_INIT_POINTER(flow->filter_list, NULL);
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
@@ -311,7 +311,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
if (list_empty(&flow->list))
return -EINVAL;
- if (flow->filter_list || flow == &p->link)
+ if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
return -EBUSY;
/*
* Reference count must be 2: one for "keepalive" (set at class
@@ -345,7 +345,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
@@ -369,11 +370,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
!(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+ struct tcf_proto *fl;
+
list_for_each_entry(flow, &p->flows, list) {
- if (flow->filter_list) {
- result = tc_classify_compat(skb,
- flow->filter_list,
- &res);
+ fl = rcu_dereference_bh(flow->filter_list);
+ if (fl) {
+ result = tc_classify_compat(skb, fl, &res);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
@@ -544,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.filter_list = NULL;
+ RCU_INIT_POINTER(p->link.filter_list, NULL);
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.classid = sch->handle;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 762a04bb8f6d..a3244a800501 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -133,7 +133,7 @@ struct cbq_class {
struct gnet_stats_rate_est64 rate_est;
struct tc_cbq_xstats xstats;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
int refcnt;
int filters;
@@ -221,6 +221,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct cbq_class **defmap;
struct cbq_class *cl = NULL;
u32 prio = skb->priority;
+ struct tcf_proto *fl;
struct tcf_result res;
/*
@@ -235,11 +236,12 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int result = 0;
defmap = head->defaults;
+ fl = rcu_dereference_bh(head->filter_list);
/*
* Step 2+n. Apply classifier.
*/
- if (!head->filter_list ||
- (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
+ result = tc_classify_compat(skb, fl, &res);
+ if (!fl || result < 0)
goto fallback;
cl = (void *)res.class;
@@ -1954,7 +1956,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index fb666d1e4de3..8abc2625c3a1 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -57,7 +57,7 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -203,9 +203,11 @@ static bool choke_classify(struct sk_buff *skb,
{
struct choke_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -259,7 +261,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (q->filter_list)
+ if (rcu_access_pointer(q->filter_list))
return choke_get_classid(nskb) == choke_get_classid(oskb);
return choke_match_flow(oskb, nskb);
@@ -267,11 +269,11 @@ static bool choke_match_random(const struct choke_sched_data *q,
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
+ int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- if (q->filter_list) {
+ if (rcu_access_pointer(q->filter_list)) {
/* If using external classifiers, get result and record it. */
if (!choke_classify(skb, sch, &ret))
goto other_drop; /* Packet was eaten by filter */
@@ -564,7 +566,8 @@ static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
return 0;
}
-static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 7bbbfe112192..d8b5ccfd248a 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -35,7 +35,7 @@ struct drr_class {
struct drr_sched {
struct list_head active;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
};
@@ -184,7 +184,8 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
drr_destroy_class(sch, cl);
}
-static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -319,6 +320,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -328,7 +330,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 49d6ef338b55..485e456c8139 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -37,7 +37,7 @@
struct dsmark_qdisc_data {
struct Qdisc *q;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 *mask; /* "owns" the array */
u8 *value;
u16 indices;
@@ -186,8 +186,8 @@ ignore:
}
}
-static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
return &p->filter_list;
@@ -229,7 +229,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->tc_index = TC_H_MIN(skb->priority);
else {
struct tcf_result res;
- int result = tc_classify(skb, p->filter_list, &res);
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
+ int result = tc_classify(skb, fl, &res);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index ba32c2b005d0..e12f997e1b4c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -416,7 +416,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
static struct sk_buff *fq_dequeue(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = ktime_get_ns();
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
@@ -787,7 +787,7 @@ nla_put_failure:
static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = ktime_get_ns();
struct tc_fq_qd_stats st = {
.gc_flows = q->stat_gc_flows,
.highprio_packets = q->stat_internal_packets,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 063b726bf1f8..105cf5557630 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -52,7 +52,7 @@ struct fq_codel_flow {
}; /* please try to keep this structure <= 64 bytes */
struct fq_codel_sched_data {
- struct tcf_proto *filter_list; /* optional external classifier */
+ struct tcf_proto __rcu *filter_list; /* optional external classifier */
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
@@ -77,13 +77,15 @@ static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
hash = jhash_3words((__force u32)keys.dst,
(__force u32)keys.src ^ keys.ip_proto,
(__force u32)keys.ports, q->perturbation);
- return ((u64)hash * q->flows_cnt) >> 32;
+
+ return reciprocal_scale(hash, q->flows_cnt);
}
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tcf_proto *filter;
struct tcf_result res;
int result;
@@ -92,11 +94,12 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->flows_cnt)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ filter = rcu_dereference(q->filter_list);
+ if (!filter)
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, filter, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -495,7 +498,8 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc04fe93c2da..11b28f651ad1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -63,15 +63,18 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
- txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
skb = NULL;
} else {
- if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+ if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
+ if (skb)
+ skb = validate_xmit_skb(skb, qdisc_dev(q));
+ }
}
return skb;
@@ -90,7 +93,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
* detect it by checking xmit owner and drop the packet when
* deadloop is detected. Return OK to try the next skb.
*/
- kfree_skb(skb);
+ kfree_skb_list(skb);
net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
dev_queue->dev->name);
ret = qdisc_qlen(q);
@@ -107,9 +110,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
}
/*
- * Transmit one skb, and handle the return status as required. Holding the
- * __QDISC___STATE_RUNNING bit guarantees that only one CPU can execute this
- * function.
+ * Transmit possibly several skbs, and handle the return status as
+ * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * only one CPU can execute this function.
*
* Returns to the caller:
* 0 - queue is empty or throttled.
@@ -126,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
- ret = dev_hard_start_xmit(skb, dev, txq);
+ skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
@@ -183,10 +186,12 @@ static inline int qdisc_restart(struct Qdisc *q)
skb = dequeue_skb(q);
if (unlikely(!skb))
return 0;
+
WARN_ON_ONCE(skb_dst_is_noref(skb));
+
root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(dev, skb);
return sch_direct_xmit(skb, q, dev, txq, root_lock);
}
@@ -518,7 +523,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- skb_queue_head_init(band2list(priv, prio));
+ __skb_queue_head_init(band2list(priv, prio));
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
@@ -616,7 +621,7 @@ void qdisc_reset(struct Qdisc *qdisc)
ops->reset(qdisc);
if (qdisc->gso_skb) {
- kfree_skb(qdisc->gso_skb);
+ kfree_skb_list(qdisc->gso_skb);
qdisc->gso_skb = NULL;
qdisc->q.qlen = 0;
}
@@ -652,7 +657,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- kfree_skb(qdisc->gso_skb);
+ kfree_skb_list(qdisc->gso_skb);
/*
* gen_estimator est_timer() might access qdisc->q.lock,
* wait a RCU grace period before freeing qdisc.
@@ -778,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev,
struct Qdisc *qdisc_default = _qdisc_default;
struct Qdisc *qdisc;
- qdisc = dev_queue->qdisc;
+ qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
spin_lock_bh(qdisc_lock(qdisc));
@@ -871,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
{
struct Qdisc *qdisc = _qdisc;
- dev_queue->qdisc = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index ec8aeaac1dd7..04b0de4c68b5 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,7 @@ struct hfsc_class {
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
unsigned int level; /* class level in hierarchy */
- struct tcf_proto *filter_list; /* filter list */
+ struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
struct hfsc_sched *sched; /* scheduler data */
@@ -1161,7 +1161,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
- tcf = q->root.filter_list;
+ tcf = rcu_dereference_bh(q->root.filter_list);
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1185,7 +1185,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return cl; /* hit leaf class */
/* apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
head = cl;
}
@@ -1285,7 +1285,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_proto **
+static struct tcf_proto __rcu **
hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9f949abcacef..14408f262143 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -103,7 +103,7 @@ struct htb_class {
u32 prio; /* these two are used only by leaves... */
int quantum; /* but stored for parent-to-leaf return */
- struct tcf_proto *filter_list; /* class attached filters */
+ struct tcf_proto __rcu *filter_list; /* class attached filters */
int filter_cnt;
int refcnt; /* usage count of this class */
@@ -153,7 +153,7 @@ struct htb_sched {
int rate2quantum; /* quant = rate / rate2quantum */
/* filters for qdisc itself */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
@@ -223,9 +223,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
if (cl->level == 0)
return cl;
/* Start with inner filter chain if a non-leaf class is selected */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
} else {
- tcf = q->filter_list;
+ tcf = rcu_dereference_bh(q->filter_list);
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -251,7 +251,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
return cl; /* we hit leaf; return it */
/* we have got inner class; apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
}
/* classification failed; try to use default class */
cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
@@ -895,7 +895,7 @@ ok:
if (!sch->q.qlen)
goto fin;
- q->now = ktime_to_ns(ktime_get());
+ q->now = ktime_get_ns();
start_at = jiffies;
next_event = q->now + 5LLU * NSEC_PER_SEC;
@@ -1044,7 +1044,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
INIT_WORK(&q->work, htb_work_func);
- skb_queue_head_init(&q->direct_queue);
+ __skb_queue_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
@@ -1225,7 +1225,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- parent->t_c = ktime_to_ns(ktime_get());
+ parent->t_c = ktime_get_ns();
parent->cmode = HTB_CAN_SEND;
}
@@ -1455,7 +1455,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
- cl->t_c = ktime_to_ns(ktime_get());
+ cl->t_c = ktime_get_ns();
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
@@ -1519,11 +1519,12 @@ failure:
return err;
}
-static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
+ struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
return fl;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 62871c14e1f9..b351125f3849 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -17,7 +17,7 @@
struct ingress_qdisc_data {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
};
/* ------------------------- Class/flow operations ------------------------- */
@@ -46,7 +46,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
@@ -59,9 +60,10 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
int result;
- result = tc_classify(skb, p->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
qdisc_bstats_update(sch, skb);
switch (result) {
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6749e2f540d0..37e7d25d21f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
memset(&sch->qstats, 0, sizeof(sch->qstats));
for (i = 0; i < dev->num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
@@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+
+ qdisc = rtnl_dereference(q->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
bstats.bytes += qdisc->bstats.bytes;
bstats.packets += qdisc->bstats.packets;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index afb050a735fa..c0466c1840f3 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -31,7 +31,7 @@ struct multiq_sched_data {
u16 bands;
u16 max_bands;
u16 curband;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc **queues;
};
@@ -42,10 +42,11 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct multiq_sched_data *q = qdisc_priv(sch);
u32 band;
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, q->filter_list, &res);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -388,7 +389,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct multiq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 79359b69ad8d..03ef99e52a5c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -24,7 +24,7 @@
struct prio_sched_data {
int bands;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
};
@@ -36,11 +36,13 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb->priority;
struct tcf_result res;
+ struct tcf_proto *fl;
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
- err = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -50,7 +52,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return NULL;
}
#endif
- if (!q->filter_list || err < 0) {
+ if (!fl || err < 0) {
if (TC_H_MAJ(band))
band = 0;
return q->queues[q->prio2band[band & TC_PRIO_MAX]];
@@ -351,7 +353,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8056fb4e618a..602ea01a4ddd 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -181,7 +181,7 @@ struct qfq_group {
};
struct qfq_sched {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
u64 oldV, V; /* Precise virtual times. */
@@ -576,7 +576,8 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
qfq_destroy_class(sch, cl);
}
-static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -704,6 +705,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -714,7 +716,8 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 9b0f7093d970..1562fb2b3f46 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -55,7 +55,7 @@ struct sfb_bins {
struct sfb_sched_data {
struct Qdisc *qdisc;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
unsigned long rehash_interval;
unsigned long warmup_time; /* double buffering warmup time in jiffies */
u32 max;
@@ -253,13 +253,13 @@ static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
return false;
}
-static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
+static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
int *qerr, u32 *salt)
{
struct tcf_result res;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -281,6 +281,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ struct tcf_proto *fl;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -306,9 +307,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- if (q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (fl) {
/* If using external classifiers, get result and record it. */
- if (!sfb_classify(skb, q, &ret, &salt))
+ if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
keys.src = salt;
keys.dst = 0;
@@ -660,7 +662,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfb_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 1af2f73906d0..80c36bd54abc 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -125,7 +125,7 @@ struct sfq_sched_data {
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
sfq_index *ht; /* Hash table ('divisor' slots) */
struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
@@ -187,6 +187,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority) == sch->handle &&
@@ -194,13 +195,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (!fl) {
skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -310,11 +312,6 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
slot->skblist_prev = skb;
}
-#define slot_queue_walk(slot, skb) \
- for (skb = slot->skblist_next; \
- skb != (struct sk_buff *)slot; \
- skb = skb->next)
-
static unsigned int sfq_drop(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
@@ -841,7 +838,8 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 18ff63433709..0c39b754083b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -239,7 +239,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
s64 ptoks = 0;
unsigned int len = qdisc_pkt_len(skb);
- now = ktime_to_ns(ktime_get());
+ now = ktime_get_ns();
toks = min_t(s64, now - q->t_c, q->buffer);
if (tbf_peak_present(q)) {
@@ -292,7 +292,7 @@ static void tbf_reset(struct Qdisc *sch)
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- q->t_c = ktime_to_ns(ktime_get());
+ q->t_c = ktime_get_ns();
q->tokens = q->buffer;
q->ptokens = q->mtu;
qdisc_watchdog_cancel(&q->watchdog);
@@ -431,7 +431,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- q->t_c = ktime_to_ns(ktime_get());
+ q->t_c = ktime_get_ns();
qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bd33793b527e..5cd291bd00e4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
struct netdev_queue *dat_queue;
struct sk_buff *skb;
+ struct Qdisc *q;
skb = __skb_dequeue(&dat->q);
dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+ q = rcu_dereference_bh(dat_queue->qdisc);
+
if (skb == NULL) {
- struct net_device *m = qdisc_dev(dat_queue->qdisc);
+ struct net_device *m = qdisc_dev(q);
if (m) {
dat->m->slaves = sch;
netif_wake_queue(m);
@@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch)
} else {
qdisc_bstats_update(sch, skb);
}
- sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+ sch->q.qlen = dat->q.qlen + q->q.qlen;
return skb;
}
@@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch)
txq = netdev_get_tx_queue(master->dev, 0);
master->slaves = NULL;
- root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+ root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
spin_lock_bh(root_lock);
- qdisc_reset(txq->qdisc);
+ qdisc_reset(rtnl_dereference(txq->qdisc));
spin_unlock_bh(root_lock);
}
}
@@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
int res;
- if (txq->qdisc == &noop_qdisc)
+ if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
return -ENODEV;
if (!dev->header_ops || !dst)
@@ -301,7 +304,6 @@ restart:
do {
struct net_device *slave = qdisc_dev(q);
struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
- const struct net_device_ops *slave_ops = slave->netdev_ops;
if (slave_txq->qdisc_sleeping != q)
continue;
@@ -317,8 +319,8 @@ restart:
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
- slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
- txq_trans_update(slave_txq);
+ netdev_start_xmit(skb, slave, slave_txq, false) ==
+ NETDEV_TX_OK) {
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);