summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig17
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c18
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c18
-rw-r--r--net/sched/act_ife.c5
-rw-r--r--net/sched/act_mirred.c57
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_sample.c4
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c21
-rw-r--r--net/sched/act_tunnel_key.c32
-rw-r--r--net/sched/act_vlan.c19
-rw-r--r--net/sched/cls_api.c614
-rw-r--r--net/sched/cls_basic.c1
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_flower.c317
-rw-r--r--net/sched/sch_cake.c15
-rw-r--r--net/sched/sch_cbs.c134
-rw-r--r--net/sched/sch_fq_codel.c25
-rw-r--r--net/sched/sch_skbprio.c320
22 files changed, 1362 insertions, 280 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7af246764a35..e95741388311 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1,6 +1,6 @@
#
# Traffic control configuration.
-#
+#
menuconfig NET_SCHED
bool "QoS and/or fair queueing"
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
If unsure, say N.
+config NET_SCH_SKBPRIO
+ tristate "SKB priority queue scheduler (SKBPRIO)"
+ help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
@@ -706,7 +719,7 @@ config NET_CLS_ACT
config NET_ACT_POLICE
tristate "Traffic Policing"
- depends on NET_CLS_ACT
+ depends on NET_CLS_ACT
---help---
Say Y here if you want to do traffic policing, i.e. strict
bandwidth limiting. This action replaces the existing policing
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 673ee7d26ff2..f0403f49edcb 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
-obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
+obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 148a89ab789b..229d63c99be2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,7 +36,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
if (!tp)
return -EINVAL;
- a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true);
+ a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
if (!a->goto_chain)
return -ENOMEM;
return 0;
@@ -44,7 +44,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
static void tcf_action_goto_chain_fini(struct tc_action *a)
{
- tcf_chain_put(a->goto_chain);
+ tcf_chain_put_by_act(a->goto_chain);
}
static void tcf_action_goto_chain_exec(const struct tc_action *a,
@@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
+static bool tcf_action_valid(int action)
+{
+ int opcode = TC_ACT_EXT_OPCODE(action);
+
+ if (!opcode)
+ return action <= TC_ACT_VALUE_MAX;
+ return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
+}
+
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
}
}
+ if (!tcf_action_valid(a->tcfa_action)) {
+ NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
+ a->tcfa_action = TC_ACT_UNSPEC;
+ }
+
return a;
err_mod:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 06f743d8ed41..6203eb075c9a 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -196,12 +196,10 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
return -EINVAL;
- bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL);
if (bpf_ops == NULL)
return -ENOMEM;
- memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
-
fprog_tmp.len = bpf_num_ops;
fprog_tmp.filter = bpf_ops;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 1e31f0e448e2..2f9bc833d046 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -252,4 +252,3 @@ module_exit(connmark_cleanup_module);
MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
MODULE_DESCRIPTION("Connection tracking mark restoring");
MODULE_LICENSE("GPL");
-
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index bd232d3bd022..648a3a35b720 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -97,7 +97,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
params_old = rtnl_dereference(p->params);
- params_new->action = parm->action;
+ p->tcf_action = parm->action;
params_new->update_flags = parm->update_flags;
rcu_assign_pointer(p->params, params_new);
if (params_old)
@@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
u32 update_flags;
int action;
- rcu_read_lock();
- params = rcu_dereference(p->params);
+ params = rcu_dereference_bh(p->params);
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
- goto drop_stats;
+ goto drop;
update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
@@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
-unlock:
- rcu_read_unlock();
return action;
drop:
- action = TC_ACT_SHOT;
-
-drop_stats:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
- goto unlock;
+ return TC_ACT_SHOT;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -605,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
.index = p->tcf_index,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
+ .action = p->tcf_action,
};
struct tcf_t t;
params = rtnl_dereference(p->params);
- opt.action = params->action;
opt.update_flags = params->update_flags;
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3d6e265758c0..df4060e32d43 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_params *p;
int ret;
- rcu_read_lock();
- p = rcu_dereference(ife->params);
+ p = rcu_dereference_bh(ife->params);
if (p->flags & IFE_ENCODE) {
ret = tcf_ife_encode(skb, a, res, p);
- rcu_read_unlock();
return ret;
}
- rcu_read_unlock();
return tcf_ife_decode(skb, a, res);
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6afd89a36c69..b26d060da08e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -25,6 +25,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
@@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
}
}
+static bool tcf_mirred_can_reinsert(int action)
+{
+ switch (action) {
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ return true;
+ }
+ return false;
+}
+
static void tcf_mirred_release(struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
@@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
+ struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
- struct sk_buff *skb2;
int retval, err = 0;
+ bool use_reinsert;
+ bool want_ingress;
+ bool is_redirect;
int m_eaction;
int mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- rcu_read_lock();
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
- dev = rcu_dereference(m->tcfm_dev);
+ dev = rcu_dereference_bh(m->tcfm_dev);
if (unlikely(!dev)) {
pr_notice_once("tc mirred: target device is gone\n");
goto out;
@@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- goto out;
+ /* we could easily avoid the clone only if called by ingress and clsact;
+ * since we can't easily detect the clsact caller, skip clone only for
+ * ingress - that covers the TC S/W datapath.
+ */
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+ use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!use_reinsert) {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out;
+ }
/* If action's target direction differs than filter's direction,
* and devices expect a mac header on xmit, then mac push/pull is
* needed.
*/
- if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
- m_mac_header_xmit) {
+ want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+ if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
if (!skb_at_tc_ingress(skb)) {
/* caught at egress, act ingress: pull mac */
mac_len = skb_network_header(skb) - skb_mac_header(skb);
@@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
}
+ skb2->skb_iif = skb->dev->ifindex;
+ skb2->dev = dev;
+
/* mirror is always swallowed */
- if (tcf_mirred_is_act_redirect(m_eaction)) {
+ if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
+
+ /* let's the caller reinsert the packet, if possible */
+ if (use_reinsert) {
+ res->ingress = want_ingress;
+ res->qstats = this_cpu_ptr(m->common.cpu_qstats);
+ return TC_ACT_REINSERT;
+ }
}
- skb2->skb_iif = skb->dev->ifindex;
- skb2->dev = dev;
- if (!tcf_mirred_act_wants_ingress(m_eaction))
+ if (!want_ingress)
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
@@ -236,7 +268,6 @@ out:
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
- rcu_read_unlock();
return retval;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cc8ffcd1ddb5..43ba999b2d23 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -286,7 +286,7 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb,
default:
ret = -EINVAL;
break;
- };
+ }
return ret;
}
@@ -516,4 +516,3 @@ static void __exit pedit_cleanup_module(void)
module_init(pedit_init_module);
module_exit(pedit_cleanup_module);
-
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3079e7be5bde..2608ccc83e5e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
retval = READ_ONCE(s->tcf_action);
- rcu_read_lock();
- psample_group = rcu_dereference(s->psample_group);
+ psample_group = rcu_dereference_bh(s->psample_group);
/* randomly sample packets according to rate */
if (psample_group && (prandom_u32() % s->rate == 0)) {
@@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
skb_pull(skb, skb->mac_len);
}
- rcu_read_unlock();
return retval;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index da56e6938c9e..a6db47ebec11 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- rcu_read_lock();
- params = rcu_dereference(d->params);
+ params = rcu_dereference_bh(d->params);
action = READ_ONCE(d->tcf_action);
if (params->flags & SKBEDIT_F_PRIORITY)
@@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
}
if (params->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = params->ptype;
-
-unlock:
- rcu_read_unlock();
return action;
+
err:
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
- action = TC_ACT_SHOT;
- goto unlock;
+ return TC_ACT_SHOT;
}
static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index cdc6bacfb190..c437c6d51a71 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
* then MAX_EDIT_LEN needs to change appropriately
*/
err = skb_ensure_writable(skb, MAX_EDIT_LEN);
- if (unlikely(err)) { /* best policy is to drop on the floor */
- qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
- return TC_ACT_SHOT;
- }
+ if (unlikely(err)) /* best policy is to drop on the floor */
+ goto drop;
- rcu_read_lock();
action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT)) {
- qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
- rcu_read_unlock();
- return action;
- }
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
- p = rcu_dereference(d->skbmod_p);
+ p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
@@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
if (flags & SKBMOD_F_ETYPE)
eth_hdr(skb)->h_proto = p->eth_type;
- rcu_read_unlock();
if (flags & SKBMOD_F_SWAPMAC) {
u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
@@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
}
return action;
+
+drop:
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ return TC_ACT_SHOT;
}
static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3ec585d58762..d42d9e112789 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -31,13 +31,11 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_tunnel_key_params *params;
int action;
- rcu_read_lock();
-
- params = rcu_dereference(t->params);
+ params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
break;
}
- rcu_read_unlock();
-
return action;
}
@@ -197,6 +193,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
[TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED },
+ [TCA_TUNNEL_KEY_ENC_TOS] = { .type = NLA_U8 },
+ [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 },
};
static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -216,6 +214,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
int opts_len = 0;
__be64 key_id;
__be16 flags;
+ u8 tos, ttl;
int ret = 0;
int err;
@@ -273,6 +272,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
}
+ tos = 0;
+ if (tb[TCA_TUNNEL_KEY_ENC_TOS])
+ tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
+ ttl = 0;
+ if (tb[TCA_TUNNEL_KEY_ENC_TTL])
+ ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
+
if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
__be32 saddr;
@@ -281,7 +287,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
- metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+ metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
dst_port, flags,
key_id, opts_len);
} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
@@ -292,7 +298,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
- metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+ metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
key_id, 0);
} else {
@@ -350,7 +356,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_old = rtnl_dereference(t->params);
- params_new->action = parm->action;
+ t->tcf_action = parm->action;
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
@@ -479,13 +485,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
.index = t->tcf_index,
.refcnt = refcount_read(&t->tcf_refcnt) - ref,
.bindcnt = atomic_read(&t->tcf_bindcnt) - bind,
+ .action = t->tcf_action,
};
struct tcf_t tm;
params = rtnl_dereference(t->params);
opt.t_action = params->tcft_action;
- opt.action = params->action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -504,6 +510,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
!(key->tun_flags & TUNNEL_CSUM)) ||
tunnel_key_opts_dump(skb, info))
goto nla_put_failure;
+
+ if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
+ goto nla_put_failure;
+
+ if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
+ goto nla_put_failure;
}
tcf_tm_dump(&tm, &t->tcf_tm);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ad37f308175a..15a0ee214c9c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- rcu_read_lock();
-
action = READ_ONCE(v->tcf_action);
- p = rcu_dereference(v->vlan_p);
+ p = rcu_dereference_bh(v->vlan_p);
switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
@@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
case TCA_VLAN_ACT_MODIFY:
/* No-op if no vlan tag (either hw-accel or in-payload) */
if (!skb_vlan_tagged(skb))
- goto unlock;
+ goto out;
/* extract existing tag (and guarantee no hw-accel tag) */
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
@@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
BUG();
}
- goto unlock;
-
-drop:
- action = TC_ACT_SHOT;
- qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
-
-unlock:
- rcu_read_unlock();
+out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
return action;
+
+drop:
+ qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+ return TC_ACT_SHOT;
}
static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 73d9967c3739..194c2e0b2737 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,7 +39,7 @@ static DEFINE_RWLOCK(cls_mod_lock);
/* Find classifier type by string name */
-static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
+static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
{
const struct tcf_proto_ops *t, *res = NULL;
@@ -57,6 +57,33 @@ static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
return res;
}
+static const struct tcf_proto_ops *
+tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+{
+ const struct tcf_proto_ops *ops;
+
+ ops = __tcf_proto_lookup_ops(kind);
+ if (ops)
+ return ops;
+#ifdef CONFIG_MODULES
+ rtnl_unlock();
+ request_module("cls_%s", kind);
+ rtnl_lock();
+ ops = __tcf_proto_lookup_ops(kind);
+ /* We dropped the RTNL semaphore in order to perform
+ * the module load. So, even if we succeeded in loading
+ * the module we have to replay the request. We indicate
+ * this using -EAGAIN.
+ */
+ if (ops) {
+ module_put(ops->owner);
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ NL_SET_ERR_MSG(extack, "TC classifier not found");
+ return ERR_PTR(-ENOENT);
+}
+
/* Register(unregister) new classifier type */
int register_tcf_proto_ops(struct tcf_proto_ops *ops)
@@ -133,27 +160,9 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
if (!tp)
return ERR_PTR(-ENOBUFS);
- err = -ENOENT;
- tp->ops = tcf_proto_lookup_ops(kind);
- if (!tp->ops) {
-#ifdef CONFIG_MODULES
- rtnl_unlock();
- request_module("cls_%s", kind);
- rtnl_lock();
- tp->ops = tcf_proto_lookup_ops(kind);
- /* We dropped the RTNL semaphore in order to perform
- * the module load. So, even if we succeeded in loading
- * the module we have to replay the request. We indicate
- * this using -EAGAIN.
- */
- if (tp->ops) {
- module_put(tp->ops->owner);
- err = -EAGAIN;
- } else {
- NL_SET_ERR_MSG(extack, "TC classifier not found");
- err = -ENOENT;
- }
-#endif
+ tp->ops = tcf_proto_lookup_ops(kind, extack);
+ if (IS_ERR(tp->ops)) {
+ err = PTR_ERR(tp->ops);
goto errout;
}
tp->classify = tp->ops->classify;
@@ -195,11 +204,12 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
- INIT_LIST_HEAD(&chain->filter_chain_list);
list_add_tail(&chain->list, &block->chain_list);
chain->block = block;
chain->index = chain_index;
chain->refcnt = 1;
+ if (!chain->index)
+ block->chain0.chain = chain;
return chain;
}
@@ -209,35 +219,28 @@ static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
if (item->chain_head_change)
item->chain_head_change(tp_head, item->chain_head_change_priv);
}
-static void tcf_chain_head_change(struct tcf_chain *chain,
- struct tcf_proto *tp_head)
+
+static void tcf_chain0_head_change(struct tcf_chain *chain,
+ struct tcf_proto *tp_head)
{
struct tcf_filter_chain_list_item *item;
+ struct tcf_block *block = chain->block;
- list_for_each_entry(item, &chain->filter_chain_list, list)
+ if (chain->index)
+ return;
+ list_for_each_entry(item, &block->chain0.filter_chain_list, list)
tcf_chain_head_change_item(item, tp_head);
}
-static void tcf_chain_flush(struct tcf_chain *chain)
-{
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
-
- tcf_chain_head_change(chain, NULL);
- while (tp) {
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp, NULL);
- tp = rtnl_dereference(chain->filter_chain);
- tcf_chain_put(chain);
- }
-}
-
static void tcf_chain_destroy(struct tcf_chain *chain)
{
struct tcf_block *block = chain->block;
list_del(&chain->list);
+ if (!chain->index)
+ block->chain0.chain = NULL;
kfree(chain);
- if (list_empty(&block->chain_list))
+ if (list_empty(&block->chain_list) && block->refcnt == 0)
kfree(block);
}
@@ -246,28 +249,119 @@ static void tcf_chain_hold(struct tcf_chain *chain)
++chain->refcnt;
}
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
- bool create)
+static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
+{
+ /* In case all the references are action references, this
+ * chain should not be shown to the user.
+ */
+ return chain->refcnt == chain->action_refcnt;
+}
+
+static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
+ u32 chain_index)
{
struct tcf_chain *chain;
list_for_each_entry(chain, &block->chain_list, list) {
- if (chain->index == chain_index) {
- tcf_chain_hold(chain);
+ if (chain->index == chain_index)
return chain;
- }
}
+ return NULL;
+}
- return create ? tcf_chain_create(block, chain_index) : NULL;
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+ u32 seq, u16 flags, int event, bool unicast);
+
+static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
+ u32 chain_index, bool create,
+ bool by_act)
+{
+ struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+
+ if (chain) {
+ tcf_chain_hold(chain);
+ } else {
+ if (!create)
+ return NULL;
+ chain = tcf_chain_create(block, chain_index);
+ if (!chain)
+ return NULL;
+ }
+
+ if (by_act)
+ ++chain->action_refcnt;
+
+ /* Send notification only in case we got the first
+ * non-action reference. Until then, the chain acts only as
+ * a placeholder for actions pointing to it and user ought
+ * not know about them.
+ */
+ if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+ tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+ RTM_NEWCHAIN, false);
+
+ return chain;
+}
+
+static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+ bool create)
+{
+ return __tcf_chain_get(block, chain_index, create, false);
}
-EXPORT_SYMBOL(tcf_chain_get);
-void tcf_chain_put(struct tcf_chain *chain)
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
+{
+ return __tcf_chain_get(block, chain_index, true, true);
+}
+EXPORT_SYMBOL(tcf_chain_get_by_act);
+
+static void tc_chain_tmplt_del(struct tcf_chain *chain);
+
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
{
- if (--chain->refcnt == 0)
+ if (by_act)
+ chain->action_refcnt--;
+ chain->refcnt--;
+
+ /* The last dropped non-action reference will trigger notification. */
+ if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
+ tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+
+ if (chain->refcnt == 0) {
+ tc_chain_tmplt_del(chain);
tcf_chain_destroy(chain);
+ }
+}
+
+static void tcf_chain_put(struct tcf_chain *chain)
+{
+ __tcf_chain_put(chain, false);
+}
+
+void tcf_chain_put_by_act(struct tcf_chain *chain)
+{
+ __tcf_chain_put(chain, true);
+}
+EXPORT_SYMBOL(tcf_chain_put_by_act);
+
+static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
+{
+ if (chain->explicitly_created)
+ tcf_chain_put(chain);
+}
+
+static void tcf_chain_flush(struct tcf_chain *chain)
+{
+ struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+
+ tcf_chain0_head_change(chain, NULL);
+ while (tp) {
+ RCU_INIT_POINTER(chain->filter_chain, tp->next);
+ tcf_proto_destroy(tp, NULL);
+ tp = rtnl_dereference(chain->filter_chain);
+ tcf_chain_put(chain);
+ }
}
-EXPORT_SYMBOL(tcf_chain_put);
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
@@ -337,10 +431,11 @@ no_offload_dev_dec:
}
static int
-tcf_chain_head_change_cb_add(struct tcf_chain *chain,
- struct tcf_block_ext_info *ei,
- struct netlink_ext_ack *extack)
+tcf_chain0_head_change_cb_add(struct tcf_block *block,
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack)
{
+ struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
item = kmalloc(sizeof(*item), GFP_KERNEL);
@@ -350,23 +445,25 @@ tcf_chain_head_change_cb_add(struct tcf_chain *chain,
}
item->chain_head_change = ei->chain_head_change;
item->chain_head_change_priv = ei->chain_head_change_priv;
- if (chain->filter_chain)
- tcf_chain_head_change_item(item, chain->filter_chain);
- list_add(&item->list, &chain->filter_chain_list);
+ if (chain0 && chain0->filter_chain)
+ tcf_chain_head_change_item(item, chain0->filter_chain);
+ list_add(&item->list, &block->chain0.filter_chain_list);
return 0;
}
static void
-tcf_chain_head_change_cb_del(struct tcf_chain *chain,
- struct tcf_block_ext_info *ei)
+tcf_chain0_head_change_cb_del(struct tcf_block *block,
+ struct tcf_block_ext_info *ei)
{
+ struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
- list_for_each_entry(item, &chain->filter_chain_list, list) {
+ list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
(item->chain_head_change == ei->chain_head_change &&
item->chain_head_change_priv == ei->chain_head_change_priv)) {
- tcf_chain_head_change_item(item, NULL);
+ if (chain0)
+ tcf_chain_head_change_item(item, NULL);
list_del(&item->list);
kfree(item);
return;
@@ -402,8 +499,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
struct netlink_ext_ack *extack)
{
struct tcf_block *block;
- struct tcf_chain *chain;
- int err;
block = kzalloc(sizeof(*block), GFP_KERNEL);
if (!block) {
@@ -413,14 +508,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
+ INIT_LIST_HEAD(&block->chain0.filter_chain_list);
- /* Create chain 0 by default, it has to be always present. */
- chain = tcf_chain_create(block, 0);
- if (!chain) {
- NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
- err = -ENOMEM;
- goto err_chain_create;
- }
block->refcnt = 1;
block->net = net;
block->index = block_index;
@@ -429,10 +518,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
if (!tcf_block_shared(block))
block->q = q;
return block;
-
-err_chain_create:
- kfree(block);
- return ERR_PTR(err);
}
static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
@@ -514,11 +599,6 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
return block;
}
-static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
-{
- return list_first_entry(&block->chain_list, struct tcf_chain, list);
-}
-
struct tcf_block_owner_item {
struct list_head list;
struct Qdisc *q;
@@ -612,10 +692,9 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
- err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
- ei, extack);
+ err = tcf_chain0_head_change_cb_add(block, ei, extack);
if (err)
- goto err_chain_head_change_cb_add;
+ goto err_chain0_head_change_cb_add;
err = tcf_block_offload_bind(block, q, ei, extack);
if (err)
@@ -625,15 +704,14 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
return 0;
err_block_offload_bind:
- tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
-err_chain_head_change_cb_add:
+ tcf_chain0_head_change_cb_del(block, ei);
+err_chain0_head_change_cb_add:
tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
if (created) {
if (tcf_block_shared(block))
tcf_block_remove(block, net);
err_block_insert:
- kfree(tcf_block_chain_zero(block));
kfree(block);
} else {
block->refcnt--;
@@ -673,10 +751,10 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
if (!block)
return;
- tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
+ tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
- if (--block->refcnt == 0) {
+ if (block->refcnt == 1) {
if (tcf_block_shared(block))
tcf_block_remove(block, block->net);
@@ -692,13 +770,16 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
tcf_block_offload_unbind(block, q, ei);
- if (block->refcnt == 0) {
+ if (block->refcnt == 1) {
/* At this point, all the chains should have refcnt >= 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+ tcf_chain_put_explicitly_created(chain);
tcf_chain_put(chain);
+ }
- /* Finally, put chain 0 and allow block to be freed. */
- tcf_chain_put(tcf_block_chain_zero(block));
+ block->refcnt--;
+ if (list_empty(&block->chain_list))
+ kfree(block);
}
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -818,7 +899,7 @@ int tcf_block_cb_register(struct tcf_block *block,
block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
extack);
- return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
+ return PTR_ERR_OR_ZERO(block_cb);
}
EXPORT_SYMBOL(tcf_block_cb_register);
@@ -938,7 +1019,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
struct tcf_proto *tp)
{
if (*chain_info->pprev == chain->filter_chain)
- tcf_chain_head_change(chain, tp);
+ tcf_chain0_head_change(chain, tp);
RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
tcf_chain_hold(chain);
@@ -951,7 +1032,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
struct tcf_proto *next = rtnl_dereference(chain_info->next);
if (tp == chain->filter_chain)
- tcf_chain_head_change(chain, next);
+ tcf_chain0_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
tcf_chain_put(chain);
}
@@ -1098,7 +1179,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, 0, event, false);
+ q, parent, NULL, event, false);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1227,6 +1308,12 @@ replay:
goto errout;
}
+ if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+ NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+ err = -EINVAL;
+ goto errout;
+ }
+
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
extack);
@@ -1302,6 +1389,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
}
chain = tcf_chain_get(block, chain_index, false);
if (!chain) {
+ /* User requested flush on non-existent chain. Nothing to do,
+ * so just return success.
+ */
+ if (prio == 0) {
+ err = 0;
+ goto errout;
+ }
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = -EINVAL;
goto errout;
@@ -1489,7 +1583,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
+ if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
@@ -1508,7 +1602,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.stop = 0;
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
+ arg.w.cookie = cb->args[2];
tp->ops->walk(tp, &arg.w);
+ cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
return false;
@@ -1606,6 +1702,330 @@ out:
return skb->len;
}
+static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
+ struct sk_buff *skb, struct tcf_block *block,
+ u32 portid, u32 seq, u16 flags, int event)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ const struct tcf_proto_ops *ops;
+ struct nlmsghdr *nlh;
+ struct tcmsg *tcm;
+ void *priv;
+
+ ops = chain->tmplt_ops;
+ priv = chain->tmplt_priv;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm__pad1 = 0;
+ tcm->tcm__pad2 = 0;
+ tcm->tcm_handle = 0;
+ if (block->q) {
+ tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
+ tcm->tcm_parent = block->q->handle;
+ } else {
+ tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
+ tcm->tcm_block_index = block->index;
+ }
+
+ if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+ goto nla_put_failure;
+
+ if (ops) {
+ if (nla_put_string(skb, TCA_KIND, ops->kind))
+ goto nla_put_failure;
+ if (ops->tmplt_dump(skb, net, priv) < 0)
+ goto nla_put_failure;
+ }
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+ u32 seq, u16 flags, int event, bool unicast)
+{
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct tcf_block *block = chain->block;
+ struct net *net = block->net;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_chain_fill_node(chain, net, skb, block, portid,
+ seq, flags, event) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
+static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
+ struct nlattr **tca,
+ struct netlink_ext_ack *extack)
+{
+ const struct tcf_proto_ops *ops;
+ void *tmplt_priv;
+
+ /* If kind is not set, user did not specify template. */
+ if (!tca[TCA_KIND])
+ return 0;
+
+ ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+ return -EOPNOTSUPP;
+ }
+
+ tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
+ if (IS_ERR(tmplt_priv)) {
+ module_put(ops->owner);
+ return PTR_ERR(tmplt_priv);
+ }
+ chain->tmplt_ops = ops;
+ chain->tmplt_priv = tmplt_priv;
+ return 0;
+}
+
+static void tc_chain_tmplt_del(struct tcf_chain *chain)
+{
+ const struct tcf_proto_ops *ops = chain->tmplt_ops;
+
+ /* If template ops are set, no work to do for us. */
+ if (!ops)
+ return;
+
+ ops->tmplt_destroy(chain->tmplt_priv);
+ module_put(ops->owner);
+}
+
+/* Add/delete/get a chain */
+
+static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct tcmsg *t;
+ u32 parent;
+ u32 chain_index;
+ struct Qdisc *q = NULL;
+ struct tcf_chain *chain = NULL;
+ struct tcf_block *block;
+ unsigned long cl;
+ int err;
+
+ if (n->nlmsg_type != RTM_GETCHAIN &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+replay:
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ if (err < 0)
+ return err;
+
+ t = nlmsg_data(n);
+ parent = t->tcm_parent;
+ cl = 0;
+
+ block = tcf_block_find(net, &q, &parent, &cl,
+ t->tcm_ifindex, t->tcm_block_index, extack);
+ if (IS_ERR(block))
+ return PTR_ERR(block);
+
+ chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+ return -EINVAL;
+ }
+ chain = tcf_chain_lookup(block, chain_index);
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
+ if (chain) {
+ if (tcf_chain_held_by_acts_only(chain)) {
+ /* The chain exists only because there is
+ * some action referencing it.
+ */
+ tcf_chain_hold(chain);
+ } else {
+ NL_SET_ERR_MSG(extack, "Filter chain already exists");
+ return -EEXIST;
+ }
+ } else {
+ if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
+ return -ENOENT;
+ }
+ chain = tcf_chain_create(block, chain_index);
+ if (!chain) {
+ NL_SET_ERR_MSG(extack, "Failed to create filter chain");
+ return -ENOMEM;
+ }
+ }
+ } else {
+ if (!chain || tcf_chain_held_by_acts_only(chain)) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+ return -EINVAL;
+ }
+ tcf_chain_hold(chain);
+ }
+
+ switch (n->nlmsg_type) {
+ case RTM_NEWCHAIN:
+ err = tc_chain_tmplt_add(chain, net, tca, extack);
+ if (err)
+ goto errout;
+ /* In case the chain was successfully added, take a reference
+ * to the chain. This ensures that an empty chain
+ * does not disappear at the end of this function.
+ */
+ tcf_chain_hold(chain);
+ chain->explicitly_created = true;
+ tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+ RTM_NEWCHAIN, false);
+ break;
+ case RTM_DELCHAIN:
+ /* Flush the chain first as the user requested chain removal. */
+ tcf_chain_flush(chain);
+ /* In case the chain was successfully deleted, put a reference
+ * to the chain previously taken during addition.
+ */
+ tcf_chain_put_explicitly_created(chain);
+ chain->explicitly_created = false;
+ break;
+ case RTM_GETCHAIN:
+ err = tc_chain_notify(chain, skb, n->nlmsg_seq,
+ n->nlmsg_seq, n->nlmsg_type, true);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ NL_SET_ERR_MSG(extack, "Unsupported message type");
+ goto errout;
+ }
+
+errout:
+ tcf_chain_put(chain);
+ if (err == -EAGAIN)
+ /* Replay the request. */
+ goto replay;
+ return err;
+}
+
+/* called with RTNL */
+static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct Qdisc *q = NULL;
+ struct tcf_block *block;
+ struct tcf_chain *chain;
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ long index_start;
+ long index;
+ u32 parent;
+ int err;
+
+ if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+ return skb->len;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ if (err)
+ return err;
+
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_lookup(net, tcm->tcm_block_index);
+ if (!block)
+ goto out;
+ /* If we work with block index, q is NULL and parent value
+ * will never be used in the following code. The check
+ * in tcf_fill_node prevents it. However, compiler does not
+ * see that far, so set parent to zero to silence the warning
+ * about parent being uninitialized.
+ */
+ parent = 0;
+ } else {
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
+ unsigned long cl = 0;
+
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return skb->len;
+
+ parent = tcm->tcm_parent;
+ if (!parent) {
+ q = dev->qdisc;
+ parent = q->handle;
+ } else {
+ q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ }
+ if (!q)
+ goto out;
+ cops = q->ops->cl_ops;
+ if (!cops)
+ goto out;
+ if (!cops->tcf_block)
+ goto out;
+ if (TC_H_MIN(tcm->tcm_parent)) {
+ cl = cops->find(q, tcm->tcm_parent);
+ if (cl == 0)
+ goto out;
+ }
+ block = cops->tcf_block(q, cl, NULL);
+ if (!block)
+ goto out;
+ if (tcf_block_shared(block))
+ q = NULL;
+ }
+
+ index_start = cb->args[0];
+ index = 0;
+
+ list_for_each_entry(chain, &block->chain_list, list) {
+ if ((tca[TCA_CHAIN] &&
+ nla_get_u32(tca[TCA_CHAIN]) != chain->index))
+ continue;
+ if (index < index_start) {
+ index++;
+ continue;
+ }
+ if (tcf_chain_held_by_acts_only(chain))
+ continue;
+ err = tc_chain_fill_node(chain, net, skb, block,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWCHAIN);
+ if (err <= 0)
+ break;
+ index++;
+ }
+
+ cb->args[0] = index;
+
+out:
+ /* If we did no progress, the error (EMSGSIZE) is real */
+ if (skb->len == 0 && err)
+ return err;
+ return skb->len;
+}
+
void tcf_exts_destroy(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
@@ -1822,6 +2242,10 @@ static int __init tc_filter_init(void)
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
tc_dump_tfilter, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
+ tc_dump_chain, 0);
return 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 95367f37098d..6a5dce8baf19 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -324,4 +324,3 @@ static void __exit exit_basic(void)
module_init(init_basic)
module_exit(exit_basic)
MODULE_LICENSE("GPL");
-
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 66e0ac9811f9..fa6fe2fe0f32 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -349,12 +349,10 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
return -EINVAL;
- bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ bpf_ops = kmemdup(nla_data(tb[TCA_BPF_OPS]), bpf_size, GFP_KERNEL);
if (bpf_ops == NULL)
return -ENOMEM;
- memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
-
fprog_tmp.len = bpf_num_ops;
fprog_tmp.filter = bpf_ops;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 8b2474293db1..a3b69bb6f4b0 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -52,6 +52,7 @@ struct fl_flow_key {
struct flow_dissector_key_mpls mpls;
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ip ip;
+ struct flow_dissector_key_ip enc_ip;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -71,6 +72,13 @@ struct fl_flow_mask {
struct list_head list;
};
+struct fl_flow_tmplt {
+ struct fl_flow_key dummy_key;
+ struct fl_flow_key mask;
+ struct flow_dissector dissector;
+ struct tcf_chain *chain;
+};
+
struct cls_fl_head {
struct rhashtable ht;
struct list_head masks;
@@ -146,6 +154,23 @@ static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
*lmkey++ = *lkey++ & *lmask++;
}
+static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
+ struct fl_flow_mask *mask)
+{
+ const long *lmask = fl_key_get_start(&mask->key, mask);
+ const long *ltmplt;
+ int i;
+
+ if (!tmplt)
+ return true;
+ ltmplt = fl_key_get_start(&tmplt->mask, mask);
+ for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
+ if (~*ltmplt++ & *lmask++)
+ return false;
+ }
+ return true;
+}
+
static void fl_clear_masked_range(struct fl_flow_key *key,
struct fl_flow_mask *mask)
{
@@ -453,6 +478,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_CVLAN_ID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CVLAN_PRIO] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_CVLAN_ETH_TYPE] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_IP_TOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -561,17 +590,17 @@ static int fl_set_key_flags(struct nlattr **tb,
return 0;
}
-static void fl_set_key_ip(struct nlattr **tb,
+static void fl_set_key_ip(struct nlattr **tb, bool encap,
struct flow_dissector_key_ip *key,
struct flow_dissector_key_ip *mask)
{
- fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
- &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
- sizeof(key->tos));
+ int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+ int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+ int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+ int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
- fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
- &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
- sizeof(key->ttl));
+ fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
+ fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
}
static int fl_set_key(struct net *net, struct nlattr **tb,
@@ -633,7 +662,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto));
- fl_set_key_ip(tb, &key->ip, &mask->ip);
+ fl_set_key_ip(tb, false, &key->ip, &mask->ip);
}
if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -768,6 +797,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
sizeof(key->enc_tp.dst));
+ fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+
if (tb[TCA_FLOWER_KEY_FLAGS])
ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
@@ -819,49 +850,52 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
FL_KEY_SET(keys, cnt, id, member); \
} while(0);
-static void fl_init_dissector(struct fl_flow_mask *mask)
+static void fl_init_dissector(struct flow_dissector *dissector,
+ struct fl_flow_key *mask)
{
struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
size_t cnt = 0;
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IP, ip);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_TCP, tcp);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ICMP, icmp);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ARP, arp);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_MPLS, mpls);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CVLAN, cvlan);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
- if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
- FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+ if (FL_KEY_IS_MASKED(mask, enc_ipv4) ||
+ FL_KEY_IS_MASKED(mask, enc_ipv6))
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
enc_control);
- FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
- skb_flow_dissector_init(&mask->dissector, keys, cnt);
+ skb_flow_dissector_init(dissector, keys, cnt);
}
static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
@@ -880,7 +914,7 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
if (err)
goto errout_free;
- fl_init_dissector(newmask);
+ fl_init_dissector(&newmask->dissector, &newmask->key);
INIT_LIST_HEAD_RCU(&newmask->filters);
@@ -929,6 +963,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr,
+ struct fl_flow_tmplt *tmplt,
struct netlink_ext_ack *extack)
{
int err;
@@ -949,6 +984,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
fl_mask_update_range(mask);
fl_set_masked_key(&f->mkey, &f->key, mask);
+ if (!fl_mask_fits_tmplt(tmplt, mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1014,7 +1054,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
- extack);
+ tp->chain->tmplt_priv, extack);
if (err)
goto errout_idr;
@@ -1099,19 +1139,17 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
- struct fl_flow_mask *mask;
- list_for_each_entry_rcu(mask, &head->masks, list) {
- list_for_each_entry_rcu(f, &mask->filters, list) {
- if (arg->count < arg->skip)
- goto skip;
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
- break;
- }
-skip:
- arg->count++;
+ arg->count = arg->skip;
+
+ while ((f = idr_get_next_ul(&head->handle_idr,
+ &arg->cookie)) != NULL) {
+ if (arg->fn(tp, f, arg) < 0) {
+ arg->stop = 1;
+ break;
}
+ arg->cookie = f->handle + 1;
+ arg->count++;
}
}
@@ -1156,6 +1194,93 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
return 0;
}
+static void fl_hw_create_tmplt(struct tcf_chain *chain,
+ struct fl_flow_tmplt *tmplt)
+{
+ struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = chain->block;
+ struct tcf_exts dummy_exts = { 0, };
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
+ cls_flower.cookie = (unsigned long) tmplt;
+ cls_flower.dissector = &tmplt->dissector;
+ cls_flower.mask = &tmplt->mask;
+ cls_flower.key = &tmplt->dummy_key;
+ cls_flower.exts = &dummy_exts;
+
+ /* We don't care if driver (any of them) fails to handle this
+ * call. It serves just as a hint for it.
+ */
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
+}
+
+static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
+ struct fl_flow_tmplt *tmplt)
+{
+ struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = chain->block;
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
+ cls_flower.cookie = (unsigned long) tmplt;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
+}
+
+static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
+ struct nlattr **tca,
+ struct netlink_ext_ack *extack)
+{
+ struct fl_flow_tmplt *tmplt;
+ struct nlattr **tb;
+ int err;
+
+ if (!tca[TCA_OPTIONS])
+ return ERR_PTR(-EINVAL);
+
+ tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+ if (!tb)
+ return ERR_PTR(-ENOBUFS);
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
+ fl_policy, NULL);
+ if (err)
+ goto errout_tb;
+
+ tmplt = kzalloc(sizeof(*tmplt), GFP_KERNEL);
+ if (!tmplt) {
+ err = -ENOMEM;
+ goto errout_tb;
+ }
+ tmplt->chain = chain;
+ err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
+ if (err)
+ goto errout_tmplt;
+ kfree(tb);
+
+ fl_init_dissector(&tmplt->dissector, &tmplt->mask);
+
+ fl_hw_create_tmplt(chain, tmplt);
+
+ return tmplt;
+
+errout_tmplt:
+ kfree(tmplt);
+errout_tb:
+ kfree(tb);
+ return ERR_PTR(err);
+}
+
+static void fl_tmplt_destroy(void *tmplt_priv)
+{
+ struct fl_flow_tmplt *tmplt = tmplt_priv;
+
+ fl_hw_destroy_tmplt(tmplt->chain, tmplt);
+ kfree(tmplt);
+}
+
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -1210,14 +1335,17 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
return 0;
}
-static int fl_dump_key_ip(struct sk_buff *skb,
+static int fl_dump_key_ip(struct sk_buff *skb, bool encap,
struct flow_dissector_key_ip *key,
struct flow_dissector_key_ip *mask)
{
- if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
- TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
- fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
- TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+ int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+ int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+ int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+ int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
+
+ if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) ||
+ fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)))
return -1;
return 0;
@@ -1286,29 +1414,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
}
-static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+static int fl_dump_key(struct sk_buff *skb, struct net *net,
+ struct fl_flow_key *key, struct fl_flow_key *mask)
{
- struct cls_fl_filter *f = fh;
- struct nlattr *nest;
- struct fl_flow_key *key, *mask;
-
- if (!f)
- return skb->len;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start(skb, TCA_OPTIONS);
- if (!nest)
- goto nla_put_failure;
-
- if (f->res.classid &&
- nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
- goto nla_put_failure;
-
- key = &f->key;
- mask = &f->mask->key;
-
if (mask->indev_ifindex) {
struct net_device *dev;
@@ -1317,9 +1425,6 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
goto nla_put_failure;
}
- if (!tc_skip_hw(f->flags))
- fl_hw_update_stats(tp, f);
-
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst)) ||
@@ -1342,8 +1447,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
TCA_FLOWER_KEY_CVLAN_PRIO,
&key->cvlan, &mask->cvlan) ||
(mask->cvlan.vlan_tpid &&
- nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- key->cvlan.vlan_tpid)))
+ nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ key->cvlan.vlan_tpid)))
goto nla_put_failure;
if (mask->basic.n_proto) {
@@ -1363,7 +1468,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
(fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto)) ||
- fl_dump_key_ip(skb, &key->ip, &mask->ip)))
+ fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
goto nla_put_failure;
if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1488,12 +1593,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
&mask->enc_tp.dst,
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
- sizeof(key->enc_tp.dst)))
+ sizeof(key->enc_tp.dst)) ||
+ fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
goto nla_put_failure;
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_fl_filter *f = fh;
+ struct nlattr *nest;
+ struct fl_flow_key *key, *mask;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+ goto nla_put_failure;
+
+ key = &f->key;
+ mask = &f->mask->key;
+
+ if (fl_dump_key(skb, net, key, mask))
+ goto nla_put_failure;
+
+ if (!tc_skip_hw(f->flags))
+ fl_hw_update_stats(tp, f);
+
if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
goto nla_put_failure;
@@ -1512,6 +1653,31 @@ nla_put_failure:
return -1;
}
+static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
+{
+ struct fl_flow_tmplt *tmplt = tmplt_priv;
+ struct fl_flow_key *key, *mask;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ key = &tmplt->dummy_key;
+ mask = &tmplt->mask;
+
+ if (fl_dump_key(skb, net, key, mask))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
{
struct cls_fl_filter *f = fh;
@@ -1532,6 +1698,9 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.reoffload = fl_reoffload,
.dump = fl_dump,
.bind_class = fl_bind_class,
+ .tmplt_create = fl_tmplt_create,
+ .tmplt_destroy = fl_tmplt_destroy,
+ .tmplt_dump = fl_tmplt_dump,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 30695691e9ff..35fc7252187c 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -80,7 +80,6 @@
#define CAKE_QUEUES (1024)
#define CAKE_FLOW_MASK 63
#define CAKE_FLOW_NAT_FLAG 64
-#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */
/* struct cobalt_params - contains codel and blue parameters
* @interval: codel initial drop rate
@@ -1546,7 +1545,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
TC_H_MIN(skb->priority) <= q->tin_cnt) {
- tin = TC_H_MIN(skb->priority) - 1;
+ tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
if (q->rate_flags & CAKE_FLAG_WASH)
cake_wash_diffserv(skb);
@@ -2569,10 +2568,12 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_CAKE_MEMORY])
q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
- if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD)
- q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
- else
- q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+ if (tb[TCA_CAKE_SPLIT_GSO]) {
+ if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
+ q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+ else
+ q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+ }
if (q->tins) {
sch_tree_lock(sch);
@@ -2608,7 +2609,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
q->target = 5000; /* 5ms: codel RFC argues
* for 5 to 10% of interval
*/
-
+ q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
q->cur_tin = 0;
q->cur_flow = 0;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index cdd96b9a27bc..e26a24017faa 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -78,18 +78,42 @@ struct cbs_sched_data {
s64 sendslope; /* in bytes/s */
s64 idleslope; /* in bytes/s */
struct qdisc_watchdog watchdog;
- int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
+ int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
+ struct Qdisc *qdisc;
};
-static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
+static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child,
+ struct sk_buff **to_free)
{
- return qdisc_enqueue_tail(skb, sch);
+ int err;
+
+ err = child->ops->enqueue(skb, child, to_free);
+ if (err != NET_XMIT_SUCCESS)
+ return err;
+
+ qdisc_qstats_backlog_inc(sch, skb);
+ sch->q.qlen++;
+
+ return NET_XMIT_SUCCESS;
}
-static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc = q->qdisc;
+
+ return cbs_child_enqueue(skb, sch, qdisc, to_free);
+}
+
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc = q->qdisc;
if (sch->q.qlen == 0 && q->credits > 0) {
/* We need to stop accumulating credits when there's
@@ -99,7 +123,7 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
q->last = ktime_get_ns();
}
- return qdisc_enqueue_tail(skb, sch);
+ return cbs_child_enqueue(skb, sch, qdisc, to_free);
}
static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -107,7 +131,7 @@ static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct cbs_sched_data *q = qdisc_priv(sch);
- return q->enqueue(skb, sch);
+ return q->enqueue(skb, sch, to_free);
}
/* timediff is in ns, slope is in bytes/s */
@@ -132,9 +156,25 @@ static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
return div64_s64(len * slope, port_rate);
}
+static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child)
+{
+ struct sk_buff *skb;
+
+ skb = child->ops->dequeue(child);
+ if (!skb)
+ return NULL;
+
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
{
struct cbs_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc = q->qdisc;
s64 now = ktime_get_ns();
struct sk_buff *skb;
s64 credits;
@@ -157,8 +197,7 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
return NULL;
}
}
-
- skb = qdisc_dequeue_head(sch);
+ skb = cbs_child_dequeue(sch, qdisc);
if (!skb)
return NULL;
@@ -178,7 +217,10 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
{
- return qdisc_dequeue_head(sch);
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc = q->qdisc;
+
+ return cbs_child_dequeue(sch, qdisc);
}
static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
@@ -310,6 +352,13 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
}
+ q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ sch->handle, extack);
+ if (!q->qdisc)
+ return -ENOMEM;
+
+ qdisc_hash_add(q->qdisc, false);
+
q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
q->enqueue = cbs_enqueue_soft;
@@ -328,6 +377,9 @@ static void cbs_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
cbs_disable_offload(dev, q);
+
+ if (q->qdisc)
+ qdisc_destroy(q->qdisc);
}
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -356,8 +408,72 @@ nla_put_failure:
return -1;
}
+static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ if (cl != 1 || !q->qdisc) /* only one class */
+ return -ENOENT;
+
+ tcm->tcm_handle |= TC_H_MIN(1);
+ tcm->tcm_info = q->qdisc->handle;
+
+ return 0;
+}
+
+static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+ struct Qdisc **old, struct netlink_ext_ack *extack)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ sch->handle, NULL);
+ if (!new)
+ new = &noop_qdisc;
+ }
+
+ *old = qdisc_replace(sch, new, &q->qdisc);
+ return 0;
+}
+
+static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ return q->qdisc;
+}
+
+static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
+{
+ return 1;
+}
+
+static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+ if (!walker->stop) {
+ if (walker->count >= walker->skip) {
+ if (walker->fn(sch, 1, walker) < 0) {
+ walker->stop = 1;
+ return;
+ }
+ }
+ walker->count++;
+ }
+}
+
+static const struct Qdisc_class_ops cbs_class_ops = {
+ .graft = cbs_graft,
+ .leaf = cbs_leaf,
+ .find = cbs_find,
+ .walk = cbs_walk,
+ .dump = cbs_dump_class,
+};
+
static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.id = "cbs",
+ .cl_ops = &cbs_class_ops,
.priv_size = sizeof(struct cbs_sched_data),
.enqueue = cbs_enqueue,
.dequeue = cbs_dequeue,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index cd2e0e342fb6..6c0a9d5dbf94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -479,24 +479,28 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = fq_codel_change(sch, opt, extack);
+ err = fq_codel_change(sch, opt, extack);
if (err)
- return err;
+ goto init_failure;
}
err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
- return err;
+ goto init_failure;
if (!q->flows) {
q->flows = kvcalloc(q->flows_cnt,
sizeof(struct fq_codel_flow),
GFP_KERNEL);
- if (!q->flows)
- return -ENOMEM;
+ if (!q->flows) {
+ err = -ENOMEM;
+ goto init_failure;
+ }
q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
- if (!q->backlogs)
- return -ENOMEM;
+ if (!q->backlogs) {
+ err = -ENOMEM;
+ goto alloc_failure;
+ }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
@@ -509,6 +513,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
+
+alloc_failure:
+ kvfree(q->flows);
+ q->flows = NULL;
+init_failure:
+ q->flows_cnt = 0;
+ return err;
}
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000000000000..52c0b6d8f1d7
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,320 @@
+/*
+ * net/sched/sch_skbprio.c SKB Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Nishanth Devarajan, <ndev2021@gmail.com>
+ * Cody Doucette, <doucette@bu.edu>
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+/* SKB Priority Queue
+ * =================================
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * Skbprio drops already-enqueued lower priority packets to make space
+ * available for higher priority packets; it was conceived as a solution
+ * for denial-of-service defenses that need to route packets with different
+ * priorities as a mean to overcome DoS attacks.
+ */
+
+struct skbprio_sched_data {
+ /* Queue state. */
+ struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+ struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+ u16 highest_prio;
+ u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+ int prio;
+
+ for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+ if (!skb_queue_empty(&q->qdiscs[prio]))
+ return prio;
+ }
+
+ /* SKB queue is empty, return 0 (default highest priority setting). */
+ return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+ int prio;
+
+ for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+ if (!skb_queue_empty(&q->qdiscs[prio]))
+ return prio;
+ }
+
+ /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+ * (default lowest priority setting).
+ */
+ return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ struct sk_buff_head *qdisc;
+ struct sk_buff_head *lp_qdisc;
+ struct sk_buff *to_drop;
+ u16 prio, lp;
+
+ /* Obtain the priority of @skb. */
+ prio = min(skb->priority, max_priority);
+
+ qdisc = &q->qdiscs[prio];
+ if (sch->q.qlen < sch->limit) {
+ __skb_queue_tail(qdisc, skb);
+ qdisc_qstats_backlog_inc(sch, skb);
+ q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+ /* Check to update highest and lowest priorities. */
+ if (prio > q->highest_prio)
+ q->highest_prio = prio;
+
+ if (prio < q->lowest_prio)
+ q->lowest_prio = prio;
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+ }
+
+ /* If this packet has the lowest priority, drop it. */
+ lp = q->lowest_prio;
+ if (prio <= lp) {
+ q->qstats[prio].drops++;
+ q->qstats[prio].overlimits++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+
+ __skb_queue_tail(qdisc, skb);
+ qdisc_qstats_backlog_inc(sch, skb);
+ q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+ /* Drop the packet at the tail of the lowest priority qdisc. */
+ lp_qdisc = &q->qdiscs[lp];
+ to_drop = __skb_dequeue_tail(lp_qdisc);
+ BUG_ON(!to_drop);
+ qdisc_qstats_backlog_dec(sch, to_drop);
+ qdisc_drop(to_drop, sch, to_free);
+
+ q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+ q->qstats[lp].drops++;
+ q->qstats[lp].overlimits++;
+
+ /* Check to update highest and lowest priorities. */
+ if (skb_queue_empty(lp_qdisc)) {
+ if (q->lowest_prio == q->highest_prio) {
+ /* The incoming packet is the only packet in queue. */
+ BUG_ON(sch->q.qlen != 1);
+ q->lowest_prio = prio;
+ q->highest_prio = prio;
+ } else {
+ q->lowest_prio = calc_new_low_prio(q);
+ }
+ }
+
+ if (prio > q->highest_prio)
+ q->highest_prio = prio;
+
+ return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+ struct sk_buff *skb = __skb_dequeue(hpq);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ sch->q.qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+
+ q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+ /* Update highest priority field. */
+ if (skb_queue_empty(hpq)) {
+ if (q->lowest_prio == q->highest_prio) {
+ BUG_ON(sch->q.qlen);
+ q->highest_prio = 0;
+ q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+ } else {
+ q->highest_prio = calc_new_high_prio(q);
+ }
+ }
+ return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_skbprio_qopt *ctl = nla_data(opt);
+
+ sch->limit = ctl->limit;
+ return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ int prio;
+
+ /* Initialise all queues, one for each possible priority. */
+ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+ __skb_queue_head_init(&q->qdiscs[prio]);
+
+ memset(&q->qstats, 0, sizeof(q->qstats));
+ q->highest_prio = 0;
+ q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+ sch->limit = 64;
+ if (!opt)
+ return 0;
+
+ return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct tc_skbprio_qopt opt;
+
+ opt.limit = sch->limit;
+
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ return -1;
+
+ return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ int prio;
+
+ sch->qstats.backlog = 0;
+ sch->q.qlen = 0;
+
+ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+ __skb_queue_purge(&q->qdiscs[prio]);
+
+ memset(&q->qstats, 0, sizeof(q->qstats));
+ q->highest_prio = 0;
+ q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ int prio;
+
+ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+ __skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+ return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+ q->qstats[cl - 1].qlen) < 0)
+ return -1;
+ return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ unsigned int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, i + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+ .leaf = skbprio_leaf,
+ .find = skbprio_find,
+ .dump = skbprio_dump_class,
+ .dump_stats = skbprio_dump_class_stats,
+ .walk = skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+ .cl_ops = &skbprio_class_ops,
+ .id = "skbprio",
+ .priv_size = sizeof(struct skbprio_sched_data),
+ .enqueue = skbprio_enqueue,
+ .dequeue = skbprio_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = skbprio_init,
+ .reset = skbprio_reset,
+ .change = skbprio_change,
+ .dump = skbprio_dump,
+ .destroy = skbprio_destroy,
+ .owner = THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+ return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+ unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");