summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/cls_flower.c587
-rw-r--r--net/sched/sch_api.c15
-rw-r--r--net/sched/sch_cbs.c98
-rw-r--r--net/sched/sch_generic.c70
-rw-r--r--net/sched/sch_taprio.c160
6 files changed, 670 insertions, 261 deletions
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 99ae30c177c7..9115f053883f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3229,7 +3229,6 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->tunnel = tcf_tunnel_info(act);
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
- entry->tunnel = tcf_tunnel_info(act);
} else if (is_tcf_pedit(act)) {
for (k = 0; k < tcf_pedit_nkeys(act); k++) {
switch (tcf_pedit_cmd(act, k)) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c04247b403ed..0d8968803e98 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/rhashtable.h>
#include <linux/workqueue.h>
+#include <linux/refcount.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
@@ -75,6 +76,7 @@ struct fl_flow_mask {
struct list_head filters;
struct rcu_work rwork;
struct list_head list;
+ refcount_t refcnt;
};
struct fl_flow_tmplt {
@@ -86,7 +88,9 @@ struct fl_flow_tmplt {
struct cls_fl_head {
struct rhashtable ht;
+ spinlock_t masks_lock; /* Protect masks list */
struct list_head masks;
+ struct list_head hw_filters;
struct rcu_work rwork;
struct idr handle_idr;
};
@@ -99,11 +103,18 @@ struct cls_fl_filter {
struct tcf_result res;
struct fl_flow_key key;
struct list_head list;
+ struct list_head hw_list;
u32 handle;
u32 flags;
u32 in_hw_count;
struct rcu_work rwork;
struct net_device *hw_dev;
+ /* Flower classifier is unlocked, which means that its reference counter
+ * can be changed concurrently without any kind of external
+ * synchronization. Use atomic reference counter to be concurrency-safe.
+ */
+ refcount_t refcnt;
+ bool deleted;
};
static const struct rhashtable_params mask_ht_params = {
@@ -304,7 +315,9 @@ static int fl_init(struct tcf_proto *tp)
if (!head)
return -ENOBUFS;
+ spin_lock_init(&head->masks_lock);
INIT_LIST_HEAD_RCU(&head->masks);
+ INIT_LIST_HEAD(&head->hw_filters);
rcu_assign_pointer(tp->root, head);
idr_init(&head->handle_idr);
@@ -313,6 +326,7 @@ static int fl_init(struct tcf_proto *tp)
static void fl_mask_free(struct fl_flow_mask *mask)
{
+ WARN_ON(!list_empty(&mask->filters));
rhashtable_destroy(&mask->ht);
kfree(mask);
}
@@ -325,22 +339,32 @@ static void fl_mask_free_work(struct work_struct *work)
fl_mask_free(mask);
}
-static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
- bool async)
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
{
- if (!list_empty(&mask->filters))
+ if (!refcount_dec_and_test(&mask->refcnt))
return false;
rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
+
+ spin_lock(&head->masks_lock);
list_del_rcu(&mask->list);
- if (async)
- tcf_queue_work(&mask->rwork, fl_mask_free_work);
- else
- fl_mask_free(mask);
+ spin_unlock(&head->masks_lock);
+
+ tcf_queue_work(&mask->rwork, fl_mask_free_work);
return true;
}
+static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
+{
+ /* Flower classifier only changes root pointer during init and destroy.
+ * Users must obtain reference to tcf_proto instance before calling its
+ * API, so tp->root pointer is protected from concurrent call to
+ * fl_destroy() by reference counting.
+ */
+ return rcu_dereference_raw(tp->root);
+}
+
static void __fl_destroy_filter(struct cls_fl_filter *f)
{
tcf_exts_destroy(&f->exts);
@@ -353,37 +377,50 @@ static void fl_destroy_filter_work(struct work_struct *work)
struct cls_fl_filter *f = container_of(to_rcu_work(work),
struct cls_fl_filter, rwork);
- rtnl_lock();
__fl_destroy_filter(f);
- rtnl_unlock();
}
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
+ if (!rtnl_held)
+ rtnl_lock();
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ spin_lock(&tp->lock);
+ list_del_init(&f->hw_list);
tcf_block_offload_dec(block, &f->flags);
+ spin_unlock(&tp->lock);
+
+ if (!rtnl_held)
+ rtnl_unlock();
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
- struct cls_fl_filter *f,
+ struct cls_fl_filter *f, bool rtnl_held,
struct netlink_ext_ack *extack)
{
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
- int err;
+ int err = 0;
+
+ if (!rtnl_held)
+ rtnl_lock();
cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
- if (!cls_flower.rule)
- return -ENOMEM;
+ if (!cls_flower.rule) {
+ err = -ENOMEM;
+ goto errout;
+ }
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
@@ -396,35 +433,51 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
if (err) {
kfree(cls_flower.rule);
- if (skip_sw) {
+ if (skip_sw)
NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+ else
+ err = 0;
+ goto errout;
}
err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
kfree(cls_flower.rule);
if (err < 0) {
- fl_hw_destroy_filter(tp, f, NULL);
- return err;
+ fl_hw_destroy_filter(tp, f, true, NULL);
+ goto errout;
} else if (err > 0) {
f->in_hw_count = err;
+ err = 0;
+ spin_lock(&tp->lock);
tcf_block_offload_inc(block, &f->flags);
+ spin_unlock(&tp->lock);
}
- if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
- return -EINVAL;
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
+ err = -EINVAL;
+ goto errout;
+ }
- return 0;
+ spin_lock(&tp->lock);
+ list_add(&f->hw_list, &head->hw_filters);
+ spin_unlock(&tp->lock);
+errout:
+ if (!rtnl_held)
+ rtnl_unlock();
+
+ return err;
}
-static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
+ bool rtnl_held)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
+ if (!rtnl_held)
+ rtnl_lock();
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
@@ -435,27 +488,81 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
cls_flower.stats.lastused);
+
+ if (!rtnl_held)
+ rtnl_unlock();
}
-static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
- struct netlink_ext_ack *extack)
+static void __fl_put(struct cls_fl_filter *f)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
- bool async = tcf_exts_get_net(&f->exts);
- bool last;
+ if (!refcount_dec_and_test(&f->refcnt))
+ return;
+
+ if (tcf_exts_get_net(&f->exts))
+ tcf_queue_work(&f->rwork, fl_destroy_filter_work);
+ else
+ __fl_destroy_filter(f);
+}
+
+static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
+{
+ struct cls_fl_filter *f;
+ rcu_read_lock();
+ f = idr_find(&head->handle_idr, handle);
+ if (f && !refcount_inc_not_zero(&f->refcnt))
+ f = NULL;
+ rcu_read_unlock();
+
+ return f;
+}
+
+static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
+ unsigned long *handle)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+ struct cls_fl_filter *f;
+
+ rcu_read_lock();
+ while ((f = idr_get_next_ul(&head->handle_idr, handle))) {
+ /* don't return filters that are being deleted */
+ if (refcount_inc_not_zero(&f->refcnt))
+ break;
+ ++(*handle);
+ }
+ rcu_read_unlock();
+
+ return f;
+}
+
+static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+ bool *last, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ *last = false;
+
+ spin_lock(&tp->lock);
+ if (f->deleted) {
+ spin_unlock(&tp->lock);
+ return -ENOENT;
+ }
+
+ f->deleted = true;
+ rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+ f->mask->filter_ht_params);
idr_remove(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
- last = fl_mask_put(head, f->mask, async);
+ spin_unlock(&tp->lock);
+
+ *last = fl_mask_put(head, f->mask);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f, extack);
+ fl_hw_destroy_filter(tp, f, rtnl_held, extack);
tcf_unbind_filter(tp, &f->res);
- if (async)
- tcf_queue_work(&f->rwork, fl_destroy_filter_work);
- else
- __fl_destroy_filter(f);
+ __fl_put(f);
- return last;
+ return 0;
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -472,13 +579,15 @@ static void fl_destroy_sleepable(struct work_struct *work)
static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct fl_flow_mask *mask, *next_mask;
struct cls_fl_filter *f, *next;
+ bool last;
list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
list_for_each_entry_safe(f, next, &mask->filters, list) {
- if (__fl_delete(tp, f, extack))
+ __fl_delete(tp, f, &last, rtnl_held, extack);
+ if (last)
break;
}
}
@@ -488,11 +597,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
tcf_queue_work(&head->rwork, fl_destroy_sleepable);
}
+static void fl_put(struct tcf_proto *tp, void *arg)
+{
+ struct cls_fl_filter *f = arg;
+
+ __fl_put(f);
+}
+
static void *fl_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
- return idr_find(&head->handle_idr, handle);
+ return __fl_get(head, handle);
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -1227,12 +1343,18 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
INIT_LIST_HEAD_RCU(&newmask->filters);
- err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
- mask_ht_params);
+ refcount_set(&newmask->refcnt, 1);
+ err = rhashtable_replace_fast(&head->ht, &mask->ht_node,
+ &newmask->ht_node, mask_ht_params);
if (err)
goto errout_destroy;
+ /* Wait until any potential concurrent users of mask are finished */
+ synchronize_rcu();
+
+ spin_lock(&head->masks_lock);
list_add_tail_rcu(&newmask->list, &head->masks);
+ spin_unlock(&head->masks_lock);
return newmask;
@@ -1250,41 +1372,77 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
struct fl_flow_mask *mask)
{
struct fl_flow_mask *newmask;
+ int ret = 0;
+
+ rcu_read_lock();
- fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
+ /* Insert mask as temporary node to prevent concurrent creation of mask
+ * with same key. Any concurrent lookups with same key will return
+ * -EAGAIN because mask's refcnt is zero. It is safe to insert
+ * stack-allocated 'mask' to masks hash table because we call
+ * synchronize_rcu() before returning from this function (either in case
+ * of error or after replacing it with heap-allocated mask in
+ * fl_create_new_mask()).
+ */
+ fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht,
+ &mask->ht_node,
+ mask_ht_params);
if (!fnew->mask) {
- if (fold)
- return -EINVAL;
+ rcu_read_unlock();
+
+ if (fold) {
+ ret = -EINVAL;
+ goto errout_cleanup;
+ }
newmask = fl_create_new_mask(head, mask);
- if (IS_ERR(newmask))
- return PTR_ERR(newmask);
+ if (IS_ERR(newmask)) {
+ ret = PTR_ERR(newmask);
+ goto errout_cleanup;
+ }
fnew->mask = newmask;
+ return 0;
+ } else if (IS_ERR(fnew->mask)) {
+ ret = PTR_ERR(fnew->mask);
} else if (fold && fold->mask != fnew->mask) {
- return -EINVAL;
+ ret = -EINVAL;
+ } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) {
+ /* Mask was deleted concurrently, try again */
+ ret = -EAGAIN;
}
+ rcu_read_unlock();
+ return ret;
- return 0;
+errout_cleanup:
+ rhashtable_remove_fast(&head->ht, &mask->ht_node,
+ mask_ht_params);
+ /* Wait until any potential concurrent users of mask are finished */
+ synchronize_rcu();
+ return ret;
}
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr,
- struct fl_flow_tmplt *tmplt,
+ struct fl_flow_tmplt *tmplt, bool rtnl_held,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
extack);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ if (!rtnl_held)
+ rtnl_lock();
tcf_bind_filter(tp, &f->res, base);
+ if (!rtnl_held)
+ rtnl_unlock();
}
err = fl_set_key(net, tb, &f->key, &mask->key, extack);
@@ -1302,25 +1460,52 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
}
+static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
+ struct cls_fl_filter *fold,
+ bool *in_ht)
+{
+ struct fl_flow_mask *mask = fnew->mask;
+ int err;
+
+ err = rhashtable_lookup_insert_fast(&mask->ht,
+ &fnew->ht_node,
+ mask->filter_ht_params);
+ if (err) {
+ *in_ht = false;
+ /* It is okay if filter with same key exists when
+ * overwriting.
+ */
+ return fold && err == -EEXIST ? 0 : err;
+ }
+
+ *in_ht = true;
+ return 0;
+}
+
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
struct nlattr **tb;
+ bool in_ht;
int err;
- if (!tca[TCA_OPTIONS])
- return -EINVAL;
+ if (!tca[TCA_OPTIONS]) {
+ err = -EINVAL;
+ goto errout_fold;
+ }
mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
- if (!mask)
- return -ENOBUFS;
+ if (!mask) {
+ err = -ENOBUFS;
+ goto errout_fold;
+ }
tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
if (!tb) {
@@ -1343,6 +1528,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
err = -ENOBUFS;
goto errout_tb;
}
+ INIT_LIST_HEAD(&fnew->hw_list);
+ refcount_set(&fnew->refcnt, 1);
err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
if (err < 0)
@@ -1358,7 +1545,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
- tp->chain->tmplt_priv, extack);
+ tp->chain->tmplt_priv, rtnl_held, extack);
if (err)
goto errout;
@@ -1366,169 +1553,247 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout;
- if (!handle) {
- handle = 1;
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- INT_MAX, GFP_KERNEL);
- } else if (!fold) {
- /* user specifies a handle and it doesn't exist */
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- handle, GFP_KERNEL);
- }
+ err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
goto errout_mask;
- fnew->handle = handle;
-
- if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
- err = -EEXIST;
- goto errout_idr;
- }
-
- err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
- fnew->mask->filter_ht_params);
- if (err)
- goto errout_idr;
if (!tc_skip_hw(fnew->flags)) {
- err = fl_hw_replace_filter(tp, fnew, extack);
+ err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
if (err)
- goto errout_mask_ht;
+ goto errout_ht;
}
if (!tc_in_hw(fnew->flags))
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ spin_lock(&tp->lock);
+
+ /* tp was deleted concurrently. -EAGAIN will cause caller to lookup
+ * proto again or create new one, if necessary.
+ */
+ if (tp->deleting) {
+ err = -EAGAIN;
+ goto errout_hw;
+ }
+
if (fold) {
+ /* Fold filter was deleted concurrently. Retry lookup. */
+ if (fold->deleted) {
+ err = -EAGAIN;
+ goto errout_hw;
+ }
+
+ fnew->handle = handle;
+
+ if (!in_ht) {
+ struct rhashtable_params params =
+ fnew->mask->filter_ht_params;
+
+ err = rhashtable_insert_fast(&fnew->mask->ht,
+ &fnew->ht_node,
+ params);
+ if (err)
+ goto errout_hw;
+ in_ht = true;
+ }
+
+ refcount_inc(&fnew->refcnt);
rhashtable_remove_fast(&fold->mask->ht,
&fold->ht_node,
fold->mask->filter_ht_params);
- if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold, NULL);
- }
-
- *arg = fnew;
-
- if (fold) {
idr_replace(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->list, &fnew->list);
+ fold->deleted = true;
+
+ spin_unlock(&tp->lock);
+
+ fl_mask_put(head, fold->mask);
+ if (!tc_skip_hw(fold->flags))
+ fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
tcf_unbind_filter(tp, &fold->res);
- tcf_exts_get_net(&fold->exts);
- tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
+ /* Caller holds reference to fold, so refcnt is always > 0
+ * after this.
+ */
+ refcount_dec(&fold->refcnt);
+ __fl_put(fold);
} else {
+ if (handle) {
+ /* user specifies a handle and it doesn't exist */
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ handle, GFP_ATOMIC);
+
+ /* Filter with specified handle was concurrently
+ * inserted after initial check in cls_api. This is not
+ * necessarily an error if NLM_F_EXCL is not set in
+ * message flags. Returning EAGAIN will cause cls_api to
+ * try to update concurrently inserted rule.
+ */
+ if (err == -ENOSPC)
+ err = -EAGAIN;
+ } else {
+ handle = 1;
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ INT_MAX, GFP_ATOMIC);
+ }
+ if (err)
+ goto errout_hw;
+
+ refcount_inc(&fnew->refcnt);
+ fnew->handle = handle;
list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
+ spin_unlock(&tp->lock);
}
+ *arg = fnew;
+
kfree(tb);
kfree(mask);
return 0;
-errout_mask_ht:
- rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
- fnew->mask->filter_ht_params);
-
-errout_idr:
- if (!fold)
- idr_remove(&head->handle_idr, fnew->handle);
-
+errout_ht:
+ spin_lock(&tp->lock);
+errout_hw:
+ fnew->deleted = true;
+ spin_unlock(&tp->lock);
+ if (!tc_skip_hw(fnew->flags))
+ fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
+ if (in_ht)
+ rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+ fnew->mask->filter_ht_params);
errout_mask:
- fl_mask_put(head, fnew->mask, false);
-
+ fl_mask_put(head, fnew->mask);
errout:
- tcf_exts_destroy(&fnew->exts);
- kfree(fnew);
+ __fl_put(fnew);
errout_tb:
kfree(tb);
errout_mask_alloc:
kfree(mask);
+errout_fold:
+ if (fold)
+ __fl_put(fold);
return err;
}
static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *f = arg;
+ bool last_on_mask;
+ int err = 0;
- rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
- f->mask->filter_ht_params);
- __fl_delete(tp, f, extack);
+ err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
*last = list_empty(&head->masks);
- return 0;
+ __fl_put(f);
+
+ return err;
}
static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
arg->count = arg->skip;
- while ((f = idr_get_next_ul(&head->handle_idr,
- &arg->cookie)) != NULL) {
+ while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
if (arg->fn(tp, f, arg) < 0) {
+ __fl_put(f);
arg->stop = 1;
break;
}
- arg->cookie = f->handle + 1;
+ __fl_put(f);
+ arg->cookie++;
arg->count++;
}
}
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ spin_lock(&tp->lock);
+ if (list_empty(&head->hw_filters)) {
+ spin_unlock(&tp->lock);
+ return NULL;
+ }
+
+ if (!f)
+ f = list_entry(&head->hw_filters, struct cls_fl_filter,
+ hw_list);
+ list_for_each_entry_continue(f, &head->hw_filters, hw_list) {
+ if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+ spin_unlock(&tp->lock);
+ return f;
+ }
+ }
+
+ spin_unlock(&tp->lock);
+ return NULL;
+}
+
static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- struct fl_flow_mask *mask;
- struct cls_fl_filter *f;
+ struct cls_fl_filter *f = NULL;
int err;
- list_for_each_entry(mask, &head->masks, list) {
- list_for_each_entry(f, &mask->filters, list) {
- if (tc_skip_hw(f->flags))
- continue;
-
- cls_flower.rule =
- flow_rule_alloc(tcf_exts_num_actions(&f->exts));
- if (!cls_flower.rule)
- return -ENOMEM;
-
- tc_cls_common_offload_init(&cls_flower.common, tp,
- f->flags, extack);
- cls_flower.command = add ?
- TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
- cls_flower.cookie = (unsigned long)f;
- cls_flower.rule->match.dissector = &mask->dissector;
- cls_flower.rule->match.mask = &mask->key;
- cls_flower.rule->match.key = &f->mkey;
-
- err = tc_setup_flow_action(&cls_flower.rule->action,
- &f->exts);
- if (err) {
- kfree(cls_flower.rule);
- if (tc_skip_sw(f->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- continue;
+ /* hw_filters list can only be changed by hw offload functions after
+ * obtaining rtnl lock. Make sure it is not changed while reoffload is
+ * iterating it.
+ */
+ ASSERT_RTNL();
+
+ while ((f = fl_get_next_hw_filter(tp, f, add))) {
+ cls_flower.rule =
+ flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule) {
+ __fl_put(f);
+ return -ENOMEM;
+ }
+
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags,
+ extack);
+ cls_flower.command = add ?
+ TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
+ cls_flower.cookie = (unsigned long)f;
+ cls_flower.rule->match.dissector = &f->mask->dissector;
+ cls_flower.rule->match.mask = &f->mask->key;
+ cls_flower.rule->match.key = &f->mkey;
+
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (tc_skip_sw(f->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ __fl_put(f);
+ return err;
}
+ goto next_flow;
+ }
- cls_flower.classid = f->res.classid;
+ cls_flower.classid = f->res.classid;
- err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
- kfree(cls_flower.rule);
+ err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
- if (err) {
- if (add && tc_skip_sw(f->flags))
- return err;
- continue;
+ if (err) {
+ if (add && tc_skip_sw(f->flags)) {
+ __fl_put(f);
+ return err;
}
-
- tc_cls_offload_cnt_update(block, &f->in_hw_count,
- &f->flags, add);
+ goto next_flow;
}
+
+ spin_lock(&tp->lock);
+ tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags,
+ add);
+ spin_unlock(&tp->lock);
+next_flow:
+ __fl_put(f);
}
return 0;
@@ -2061,6 +2326,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct cls_fl_filter *f = fh;
struct nlattr *nest;
struct fl_flow_key *key, *mask;
+ bool skip_hw;
if (!f)
return skb->len;
@@ -2071,21 +2337,26 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
if (!nest)
goto nla_put_failure;
+ spin_lock(&tp->lock);
+
if (f->res.classid &&
nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
key = &f->key;
mask = &f->mask->key;
+ skip_hw = tc_skip_hw(f->flags);
if (fl_dump_key(skb, net, key, mask))
- goto nla_put_failure;
-
- if (!tc_skip_hw(f->flags))
- fl_hw_update_stats(tp, f);
+ goto nla_put_failure_locked;
if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
+
+ spin_unlock(&tp->lock);
+
+ if (!skip_hw)
+ fl_hw_update_stats(tp, f, rtnl_held);
if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
goto nla_put_failure;
@@ -2100,6 +2371,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
return skb->len;
+nla_put_failure_locked:
+ spin_unlock(&tp->lock);
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
@@ -2144,6 +2417,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.init = fl_init,
.destroy = fl_destroy,
.get = fl_get,
+ .put = fl_put,
.change = fl_change,
.delete = fl_delete,
.walk = fl_walk,
@@ -2154,6 +2428,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.tmplt_destroy = fl_tmplt_destroy,
.tmplt_dump = fl_tmplt_dump,
.owner = THIS_MODULE,
+ .flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
static int __init cls_fl_init(void)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fb8f138b9776..c126b9f78d6e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -998,6 +998,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
qdisc_put(old);
}
+static void qdisc_clear_nolock(struct Qdisc *sch)
+{
+ sch->flags &= ~TCQ_F_NOLOCK;
+ if (!(sch->flags & TCQ_F_CPUSTATS))
+ return;
+
+ free_percpu(sch->cpu_bstats);
+ free_percpu(sch->cpu_qstats);
+ sch->cpu_bstats = NULL;
+ sch->cpu_qstats = NULL;
+ sch->flags &= ~TCQ_F_CPUSTATS;
+}
+
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
* to device "dev".
*
@@ -1076,7 +1089,7 @@ skip:
/* Only support running class lockless if parent is lockless */
if (new && (new->flags & TCQ_F_NOLOCK) &&
parent && !(parent->flags & TCQ_F_NOLOCK))
- new->flags &= ~TCQ_F_NOLOCK;
+ qdisc_clear_nolock(new);
if (!cops || !cops->graft)
return -EOPNOTSUPP;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index c6a502933fe7..f68fd7a0e038 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -61,16 +61,20 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+static LIST_HEAD(cbs_list);
+static DEFINE_SPINLOCK(cbs_list_lock);
+
#define BYTES_PER_KBIT (1000LL / 8)
struct cbs_sched_data {
bool offload;
int queue;
- s64 port_rate; /* in bytes/s */
+ atomic64_t port_rate; /* in bytes/s */
s64 last; /* timestamp in ns */
s64 credits; /* in bytes */
s32 locredit; /* in bytes */
@@ -82,6 +86,7 @@ struct cbs_sched_data {
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
+ struct list_head cbs_list;
};
static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -181,6 +186,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ if (atomic64_read(&q->port_rate) == -1) {
+ WARN_ONCE(1, "cbs: dequeue() called with unknown port rate.");
+ return NULL;
+ }
+
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -207,7 +217,8 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
/* As sendslope is a negative number, this will decrease the
* amount of q->credits.
*/
- credits = credits_from_len(len, q->sendslope, q->port_rate);
+ credits = credits_from_len(len, q->sendslope,
+ atomic64_read(&q->port_rate));
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
@@ -294,6 +305,50 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
return 0;
}
+static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int port_rate = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT;
+
+ atomic64_set(&q->port_rate, port_rate);
+ netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->port_rate),
+ ecmd.base.speed);
+}
+
+static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct cbs_sched_data *q;
+ struct net_device *qdev;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&cbs_list_lock);
+ list_for_each_entry(q, &cbs_list, cbs_list) {
+ qdev = qdisc_dev(q->qdisc);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&cbs_list_lock);
+
+ if (found)
+ cbs_set_port_rate(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -315,16 +370,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
qopt = nla_data(tb[TCA_CBS_PARMS]);
if (!qopt->offload) {
- struct ethtool_link_ksettings ecmd;
- s64 link_speed;
-
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
-
+ cbs_set_port_rate(dev, q);
cbs_disable_offload(dev, q);
} else {
err = cbs_enable_offload(dev, q, qopt, extack);
@@ -347,6 +393,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ int err;
if (!opt) {
NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
@@ -367,7 +414,17 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
- return cbs_change(sch, opt, extack);
+ err = cbs_change(sch, opt, extack);
+ if (err)
+ return err;
+
+ if (!q->offload) {
+ spin_lock(&cbs_list_lock);
+ list_add(&q->cbs_list, &cbs_list);
+ spin_unlock(&cbs_list_lock);
+ }
+
+ return 0;
}
static void cbs_destroy(struct Qdisc *sch)
@@ -375,8 +432,11 @@ static void cbs_destroy(struct Qdisc *sch)
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- qdisc_watchdog_cancel(&q->watchdog);
+ spin_lock(&cbs_list_lock);
+ list_del(&q->cbs_list);
+ spin_unlock(&cbs_list_lock);
+ qdisc_watchdog_cancel(&q->watchdog);
cbs_disable_offload(dev, q);
if (q->qdisc)
@@ -487,14 +547,24 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct notifier_block cbs_device_notifier = {
+ .notifier_call = cbs_dev_notifier,
+};
+
static int __init cbs_module_init(void)
{
+ int err = register_netdevice_notifier(&cbs_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&cbs_qdisc_ops);
}
static void __exit cbs_module_exit(void)
{
unregister_qdisc(&cbs_qdisc_ops);
+ unregister_netdevice_notifier(&cbs_device_notifier);
}
module_init(cbs_module_init)
module_exit(cbs_module_exit)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a117d9260558..848aab3693bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ qdisc_qstats_cpu_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
@@ -118,52 +118,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
spin_unlock(lock);
}
-static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- while (skb) {
- struct sk_buff *next = skb->next;
-
- __skb_queue_tail(&q->gso_skb, skb);
- q->qstats.requeues++;
- qdisc_qstats_backlog_inc(q, skb);
- q->q.qlen++; /* it's still part of the queue */
+ spinlock_t *lock = NULL;
- skb = next;
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
}
- __netif_schedule(q);
- return 0;
-}
-
-static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
-{
- spinlock_t *lock = qdisc_lock(q);
-
- spin_lock(lock);
while (skb) {
struct sk_buff *next = skb->next;
__skb_queue_tail(&q->gso_skb, skb);
- qdisc_qstats_cpu_requeues_inc(q);
- qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ /* it's still part of the queue */
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_requeues_inc(q);
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ q->qstats.requeues++;
+ qdisc_qstats_backlog_inc(q, skb);
+ q->q.qlen++;
+ }
skb = next;
}
- spin_unlock(lock);
-
+ if (lock)
+ spin_unlock(lock);
__netif_schedule(q);
-
- return 0;
-}
-
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
-{
- if (q->flags & TCQ_F_NOLOCK)
- return dev_requeue_skb_locked(skb, q);
- else
- return __dev_requeue_skb(skb, q);
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -252,7 +236,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -645,11 +629,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_atomic_qlen_inc(qdisc);
- /* Note: skb can not be used after skb_array_produce(),
- * so we better not use qdisc_qstats_cpu_backlog_inc()
- */
- this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
+ qdisc_update_stats_at_enqueue(qdisc, pkt_len);
return NET_XMIT_SUCCESS;
}
@@ -668,9 +648,9 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
skb = __skb_array_consume(q);
}
if (likely(skb)) {
- qdisc_qstats_cpu_backlog_dec(qdisc, skb);
- qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_atomic_qlen_dec(qdisc);
+ qdisc_update_stats_at_dequeue(qdisc, skb);
+ } else {
+ qdisc->empty = true;
}
return skb;
@@ -714,6 +694,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
+ q->qlen = 0;
}
}
@@ -880,6 +861,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
+ sch->empty = true;
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c7041999eb5d..df848a36b222 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <net/netlink.h>
@@ -20,6 +21,9 @@
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+static LIST_HEAD(taprio_list);
+static DEFINE_SPINLOCK(taprio_list_lock);
+
#define TAPRIO_ALL_GATES_OPEN -1
struct sched_entry {
@@ -42,9 +46,9 @@ struct taprio_sched {
struct Qdisc *root;
s64 base_time;
int clockid;
- int picos_per_byte; /* Using picoseconds because for 10Gbps+
- * speeds it's sub-nanoseconds per byte
- */
+ atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
size_t num_entries;
/* Protects the update side of the RCU protected current_entry */
@@ -53,6 +57,7 @@ struct taprio_sched {
struct list_head entries;
ktime_t (*get_time)(void);
struct hrtimer advance_timer;
+ struct list_head taprio_list;
};
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -85,7 +90,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
- gate_mask = entry ? entry->gate_mask : -1;
+ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
rcu_read_unlock();
if (!gate_mask)
@@ -107,7 +112,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
tc = netdev_get_prio_tc_map(dev, prio);
if (!(gate_mask & BIT(tc)))
- return NULL;
+ continue;
return skb;
}
@@ -117,7 +122,14 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
static inline int length_to_duration(struct taprio_sched *q, int len)
{
- return (len * q->picos_per_byte) / 1000;
+ return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+}
+
+static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+{
+ atomic_set(&entry->budget,
+ div64_u64((u64)entry->interval * 1000,
+ atomic64_read(&q->picos_per_byte)));
}
static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
@@ -129,6 +141,11 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
u32 gate_mask;
int i;
+ if (atomic64_read(&q->picos_per_byte) == -1) {
+ WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
+ return NULL;
+ }
+
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
/* if there's no entry, it means that the schedule didn't
@@ -171,12 +188,12 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
ktime_after(guard, entry->close_time))
- return NULL;
+ continue;
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
atomic_sub_return(len, &entry->budget) < 0)
- return NULL;
+ continue;
skb = child->ops->dequeue(child);
if (unlikely(!skb))
@@ -192,14 +209,6 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
return NULL;
}
-static bool should_restart_cycle(const struct taprio_sched *q,
- const struct sched_entry *entry)
-{
- WARN_ON(!entry);
-
- return list_is_last(&entry->list, &q->entries);
-}
-
static enum hrtimer_restart advance_sched(struct hrtimer *timer)
{
struct taprio_sched *q = container_of(timer, struct taprio_sched,
@@ -223,7 +232,7 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
goto first_run;
}
- if (should_restart_cycle(q, entry))
+ if (list_is_last(&entry->list, &q->entries))
next = list_first_entry(&q->entries, struct sched_entry,
list);
else
@@ -232,8 +241,7 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
close_time = ktime_add_ns(entry->close_time, next->interval);
next->close_time = close_time;
- atomic_set(&next->budget,
- (next->interval * 1000) / q->picos_per_byte);
+ taprio_set_budget(q, next);
first_run:
rcu_assign_pointer(q->current_entry, next);
@@ -523,7 +531,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
return 0;
}
-static ktime_t taprio_get_start_time(struct Qdisc *sch)
+static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
{
struct taprio_sched *q = qdisc_priv(sch);
struct sched_entry *entry;
@@ -531,27 +539,33 @@ static ktime_t taprio_get_start_time(struct Qdisc *sch)
s64 n;
base = ns_to_ktime(q->base_time);
- cycle = 0;
+ now = q->get_time();
+
+ if (ktime_after(base, now)) {
+ *start = base;
+ return 0;
+ }
/* Calculate the cycle_time, by summing all the intervals.
*/
+ cycle = 0;
list_for_each_entry(entry, &q->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- if (!cycle)
- return base;
-
- now = q->get_time();
-
- if (ktime_after(base, now))
- return base;
+ /* The qdisc is expected to have at least one sched_entry. Moreover,
+ * any entry must have 'interval' > 0. Thus if the cycle time is zero,
+ * something went really wrong. In that case, we should warn about this
+ * inconsistent state and return error.
+ */
+ if (WARN_ON(!cycle))
+ return -EFAULT;
/* Schedule the start time for the beginning of the next
* cycle.
*/
n = div64_s64(ktime_sub_ns(now, base), cycle);
-
- return ktime_add_ns(base, (n + 1) * cycle);
+ *start = ktime_add_ns(base, (n + 1) * cycle);
+ return 0;
}
static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
@@ -566,8 +580,7 @@ static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
list);
first->close_time = ktime_add_ns(start, first->interval);
- atomic_set(&first->budget,
- (first->interval * 1000) / q->picos_per_byte);
+ taprio_set_budget(q, first);
rcu_assign_pointer(q->current_entry, NULL);
spin_unlock_irqrestore(&q->current_entry_lock, flags);
@@ -575,6 +588,52 @@ static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
}
+static void taprio_set_picos_per_byte(struct net_device *dev,
+ struct taprio_sched *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int picos_per_byte = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+ ecmd.base.speed * 1000 * 1000);
+
+ atomic64_set(&q->picos_per_byte, picos_per_byte);
+ netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->picos_per_byte),
+ ecmd.base.speed);
+}
+
+static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *qdev;
+ struct taprio_sched *q;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&taprio_list_lock);
+ list_for_each_entry(q, &taprio_list, taprio_list) {
+ qdev = qdisc_dev(q->root);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&taprio_list_lock);
+
+ if (found)
+ taprio_set_picos_per_byte(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -582,9 +641,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- struct ethtool_link_ksettings ecmd;
int i, err, size;
- s64 link_speed;
ktime_t start;
err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
@@ -592,7 +649,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
- err = -EINVAL;
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
@@ -657,17 +713,13 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->prio_tc_map[i]);
}
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
- link_speed * 1000 * 1000);
+ taprio_set_picos_per_byte(dev, q);
- start = taprio_get_start_time(sch);
- if (!start)
- return 0;
+ err = taprio_get_start_time(sch, &start);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
+ return err;
+ }
taprio_start_sched(sch, start);
@@ -681,6 +733,10 @@ static void taprio_destroy(struct Qdisc *sch)
struct sched_entry *entry, *n;
unsigned int i;
+ spin_lock(&taprio_list_lock);
+ list_del(&q->taprio_list);
+ spin_unlock(&taprio_list_lock);
+
hrtimer_cancel(&q->advance_timer);
if (q->qdiscs) {
@@ -735,6 +791,10 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt)
return -EINVAL;
+ spin_lock(&taprio_list_lock);
+ list_add(&q->taprio_list, &taprio_list);
+ spin_unlock(&taprio_list_lock);
+
return taprio_change(sch, opt, extack);
}
@@ -947,14 +1007,24 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct notifier_block taprio_device_notifier = {
+ .notifier_call = taprio_dev_notifier,
+};
+
static int __init taprio_module_init(void)
{
+ int err = register_netdevice_notifier(&taprio_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&taprio_qdisc_ops);
}
static void __exit taprio_module_exit(void)
{
unregister_qdisc(&taprio_qdisc_ops);
+ unregister_netdevice_notifier(&taprio_device_notifier);
}
module_init(taprio_module_init);